diff --git a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out index de82857..f92fcf1 100644 --- a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out +++ b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out @@ -41,6 +41,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -57,6 +58,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.accumulo_table_1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -82,6 +90,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.accumulo_table_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT * WHERE (key%2)=0 PREHOOK: type: QUERY @@ -501,9 +535,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-6 depends on stages: Stage-2, Stage-1, Stage-4 Stage-1 is a root stage - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-0 @@ -518,11 +554,18 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value, count + Column Types: int, string, int + Table: default.accumulo_table_3 + Stage: Stage-1 Pre Insert operator: Pre-Insert task - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -604,6 +647,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.accumulo_table_3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, count + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE accumulo_table_3 SELECT x.key, x.value, Y.count diff --git a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out index 6621a4e..50a7580 100644 --- a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out +++ b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out @@ -35,6 +35,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-12 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 + Stage-13 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 Stage-1 is a root stage Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 @@ -44,6 +46,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-0 @@ -58,6 +61,20 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x2 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -83,6 +100,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +128,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.src_x2 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -151,6 +209,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src a insert overwrite table src_x1 select key,"" where a.key > 0 AND a.key < 50 diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out index 6876ca8..b02bc17 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out index 79cf8fe..db70573 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -68,6 +69,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -91,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: smallint, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out index fec58ef..40c2dbc 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -68,6 +69,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -91,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out index 1131478..c75fa3b 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +112,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(cast(src.key as tinyint), src.value) ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe' diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out index 8d3b95e..427286a 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml index 62364fe..4985803 100644 --- a/data/conf/hive-site.xml +++ b/data/conf/hive-site.xml @@ -302,12 +302,15 @@ true - hive.llap.io.allocator.direct false + + hive.stats.column.autogather + true + hive.materializedview.rewriting diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out index 276b6b8..035e62e 100644 --- a/hbase-handler/src/test/results/positive/hbase_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -41,6 +41,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -57,6 +58,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.hbase_table_1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -82,6 +90,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0 PREHOOK: type: QUERY @@ -504,9 +538,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-6 depends on stages: Stage-2, Stage-1, Stage-4 Stage-1 is a root stage - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-0 @@ -521,11 +557,18 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value, count + Column Types: int, string, int + Table: default.hbase_table_3 + Stage: Stage-1 Pre Insert operator: Pre-Insert task - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -607,6 +650,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, count + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE hbase_table_3 SELECT x.key, x.value, Y.count diff --git a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out index 68a417d..a8ba3f6 100644 --- a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out +++ b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out @@ -35,6 +35,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-12 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 + Stage-13 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 Stage-1 is a root stage Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 @@ -44,6 +46,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-0 @@ -58,6 +61,20 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x2 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -83,6 +100,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +128,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.src_x2 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -151,6 +209,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src a insert overwrite table src_x1 select key,"" where a.key > 0 AND a.key < 50 diff --git a/hbase-handler/src/test/results/positive/hbasestats.q.out b/hbase-handler/src/test/results/positive/hbasestats.q.out index bf902e4..9909342 100644 --- a/hbase-handler/src/test/results/positive/hbasestats.q.out +++ b/hbase-handler/src/test/results/positive/hbasestats.q.out @@ -64,6 +64,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -80,6 +81,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, state, country, country_id + Column Types: string, string, string, int + Table: default.users + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -102,6 +110,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.users + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + outputColumnNames: key, state, country, country_id + Statistics: Num rows: 500 Data size: 133000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(state, 16), compute_stats(country, 16), compute_stats(country_id, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE users SELECT 'user1', 'IA', 'USA', 0 FROM src PREHOOK: type: QUERY @@ -131,6 +165,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -196,6 +231,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -252,7 +288,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 2 @@ -323,6 +359,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 2 diff --git a/ql/src/test/queries/clientpositive/bucket_num_reducers.q b/ql/src/test/queries/clientpositive/bucket_num_reducers.q index 06f334e..5c5008e 100644 --- a/ql/src/test/queries/clientpositive/bucket_num_reducers.q +++ b/ql/src/test/queries/clientpositive/bucket_num_reducers.q @@ -1,4 +1,4 @@ -; +set hive.stats.column.autogather=false; set hive.exec.mode.local.auto=false; set mapred.reduce.tasks = 10; diff --git a/ql/src/test/queries/clientpositive/combine1.q b/ql/src/test/queries/clientpositive/combine1.q index 3bcb8b1..b300830 100644 --- a/ql/src/test/queries/clientpositive/combine1.q +++ b/ql/src/test/queries/clientpositive/combine1.q @@ -7,6 +7,8 @@ set mapred.max.split.size=256; set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; +set hive.stats.column.autogather=false; + -- SORT_QUERY_RESULTS create table combine1_1(key string, value string) stored as textfile; diff --git a/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q b/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q index 4dcea1f..7159ad5 100644 --- a/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q +++ b/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; --SORT_QUERY_RESULTS -- Java JCE must be installed in order to hava a key length of 256 bits diff --git a/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q b/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q index 6824c1c..c0ddb8b 100644 --- a/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q +++ b/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.exec.infer.bucket.sort=true; set hive.exec.infer.bucket.sort.num.buckets.power.two=true; diff --git a/ql/src/test/queries/clientpositive/orc_wide_table.q b/ql/src/test/queries/clientpositive/orc_wide_table.q index 422a3c2..d2ec385 100644 --- a/ql/src/test/queries/clientpositive/orc_wide_table.q +++ b/ql/src/test/queries/clientpositive/orc_wide_table.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.stats.column.autogather=false; drop table if exists test_txt; drop table if exists test_orc; create table test_txt( diff --git a/ql/src/test/queries/clientpositive/udf_round_2.q b/ql/src/test/queries/clientpositive/udf_round_2.q index 43988c1..38885a9 100644 --- a/ql/src/test/queries/clientpositive/udf_round_2.q +++ b/ql/src/test/queries/clientpositive/udf_round_2.q @@ -1,4 +1,5 @@ set hive.fetch.task.conversion=more; +set hive.stats.column.autogather=false; -- test for NaN (not-a-number) create table tstTbl1(n double); diff --git a/ql/src/test/results/clientnegative/fileformat_void_input.q.out b/ql/src/test/results/clientnegative/fileformat_void_input.q.out index 6043258..538e8f4 100644 --- a/ql/src/test/results/clientnegative/fileformat_void_input.q.out +++ b/ql/src/test/results/clientnegative/fileformat_void_input.q.out @@ -10,16 +10,4 @@ POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -FAILED: SemanticException 3:20 Input format must implement InputFormat. Error encountered near token 'dest1' +FAILED: SemanticException 1:70 Input format must implement InputFormat. Error encountered near token 'dest1' diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out index 12dcc11..fc61bc6 100644 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out +++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out @@ -79,7 +79,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -178,7 +178,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -277,7 +277,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -376,7 +376,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -475,7 +475,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -574,7 +574,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -673,7 +673,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -772,7 +772,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -871,7 +871,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out index 944482c..5b9ba69 100644 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out +++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out @@ -88,7 +88,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -145,7 +145,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -275,7 +275,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -364,7 +364,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 12 numRows 500 diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 426a4de..6173125 100644 --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -232,7 +232,7 @@ STAGE PLANS: dt 100 ts 3.0 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -278,7 +278,7 @@ STAGE PLANS: dt 100 ts 6.30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -469,7 +469,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -538,7 +538,7 @@ STAGE PLANS: partcol1 2 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git a/ql/src/test/results/clientpositive/alter_table_add_partition.q.out b/ql/src/test/results/clientpositive/alter_table_add_partition.q.out index ae6624f..5c7c809 100644 --- a/ql/src/test/results/clientpositive/alter_table_add_partition.q.out +++ b/ql/src/test/results/clientpositive/alter_table_add_partition.q.out @@ -199,7 +199,7 @@ Database: default Table: mp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 diff --git a/ql/src/test/results/clientpositive/alter_table_serde2.q.out b/ql/src/test/results/clientpositive/alter_table_serde2.q.out index ae0ef54..324859d 100644 --- a/ql/src/test/results/clientpositive/alter_table_serde2.q.out +++ b/ql/src/test/results/clientpositive/alter_table_serde2.q.out @@ -79,7 +79,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -179,7 +179,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out index 2bfc04a..3a47edb 100644 --- a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out +++ b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out @@ -102,7 +102,7 @@ STAGE PLANS: partition values: age 15 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -147,7 +147,7 @@ STAGE PLANS: partition values: age 30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -192,7 +192,7 @@ STAGE PLANS: partition values: age 40 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -237,7 +237,7 @@ STAGE PLANS: partition values: age __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out index e22c3ef..bcb1dd2 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from loc_orc where state='OH' @@ -87,17 +87,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (state = 'OH') (type: boolean) - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index a8e4854..150e7f8 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,13 +129,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string), _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -151,7 +151,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +159,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -743,30 +743,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/auto_join1.q.out b/ql/src/test/results/clientpositive/auto_join1.q.out index 5f4bb74..dbd49a5 100644 --- a/ql/src/test/results/clientpositive/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/auto_join1.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join14.q.out b/ql/src/test/results/clientpositive/auto_join14.q.out index 1dd677c..affad1e 100644 --- a/ql/src/test/results/clientpositive/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/auto_join14.q.out @@ -15,13 +15,15 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join17.q.out b/ql/src/test/results/clientpositive/auto_join17.q.out index d39c36e..87158b7 100644 --- a/ql/src/test/results/clientpositive/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/auto_join17.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join19.q.out b/ql/src/test/results/clientpositive/auto_join19.q.out index 3f70055..d492a03 100644 --- a/ql/src/test/results/clientpositive/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/auto_join19.q.out @@ -17,13 +17,15 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out index 3f70055..d492a03 100644 --- a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out +++ b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out @@ -17,13 +17,15 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git a/ql/src/test/results/clientpositive/auto_join2.q.out b/ql/src/test/results/clientpositive/auto_join2.q.out index b17d344..0c3fa26 100644 --- a/ql/src/test/results/clientpositive/auto_join2.q.out +++ b/ql/src/test/results/clientpositive/auto_join2.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -62,7 +64,7 @@ STAGE PLANS: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -103,6 +105,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -119,6 +136,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join25.q.out b/ql/src/test/results/clientpositive/auto_join25.q.out index 534bdb6..77cfe0f 100644 --- a/ql/src/test/results/clientpositive/auto_join25.q.out +++ b/ql/src/test/results/clientpositive/auto_join25.q.out @@ -33,11 +33,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL RUN: Stage-1:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-10:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -64,8 +66,6 @@ PREHOOK: Input: default@src PREHOOK: Output: default@dest_j2 FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY @@ -73,14 +73,16 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j2 POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +RUN: Stage-12:CONDITIONAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-8:CONDITIONAL -RUN: Stage-12:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-9:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-7:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS +RUN: Stage-17:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j2 @@ -114,11 +116,13 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL RUN: Stage-1:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-10:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/auto_join26.q.out b/ql/src/test/results/clientpositive/auto_join26.q.out index b05145d..50340cd 100644 --- a/ql/src/test/results/clientpositive/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/auto_join26.q.out @@ -15,13 +15,15 @@ INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:x @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join3.q.out b/ql/src/test/results/clientpositive/auto_join3.q.out index 35e8273..d230ae9 100644 --- a/ql/src/test/results/clientpositive/auto_join3.q.out +++ b/ql/src/test/results/clientpositive/auto_join3.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -64,7 +66,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -99,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join4.q.out b/ql/src/test/results/clientpositive/auto_join4.q.out index eacf056..a24687d 100644 --- a/ql/src/test/results/clientpositive/auto_join4.q.out +++ b/ql/src/test/results/clientpositive/auto_join4.q.out @@ -37,13 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +68,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join5.q.out b/ql/src/test/results/clientpositive/auto_join5.q.out index d526595..8df09ca 100644 --- a/ql/src/test/results/clientpositive/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/auto_join5.q.out @@ -37,13 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -66,7 +68,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join6.q.out b/ql/src/test/results/clientpositive/auto_join6.q.out index 53caf7d..d661908 100644 --- a/ql/src/test/results/clientpositive/auto_join6.q.out +++ b/ql/src/test/results/clientpositive/auto_join6.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join7.q.out b/ql/src/test/results/clientpositive/auto_join7.q.out index aec4e82..8a23e3d 100644 --- a/ql/src/test/results/clientpositive/auto_join7.q.out +++ b/ql/src/test/results/clientpositive/auto_join7.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +128,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -140,6 +157,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join8.q.out b/ql/src/test/results/clientpositive/auto_join8.q.out index ccbafba..8f3a02b 100644 --- a/ql/src/test/results/clientpositive/auto_join8.q.out +++ b/ql/src/test/results/clientpositive/auto_join8.q.out @@ -37,13 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +68,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -102,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -118,6 +135,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join9.q.out b/ql/src/test/results/clientpositive/auto_join9.q.out index d7d7d18..6474ef0 100644 --- a/ql/src/test/results/clientpositive/auto_join9.q.out +++ b/ql/src/test/results/clientpositive/auto_join9.q.out @@ -15,13 +15,15 @@ FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out index 156be41..a7508c2 100644 --- a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -141,7 +141,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -162,7 +162,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -182,7 +182,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_0:orderpayment, $hdt$_1:dim_pay_date] + /orderpayment_small [$hdt$_1:orderpayment, $hdt$_2:dim_pay_date] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -277,7 +277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -298,7 +298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -318,7 +318,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_2:deal] + /orderpayment_small [$hdt$_3:deal] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -414,7 +414,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -435,7 +435,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -455,7 +455,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_3:order_city] + /orderpayment_small [$hdt$_4:order_city] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -551,7 +551,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -572,7 +572,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -592,7 +592,7 @@ STAGE PLANS: name: default.user_small name: default.user_small Truncated Path -> Alias: - /user_small [$hdt$_4:user] + /user_small [$hdt$_0:user] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out index 8c7658c..eda6619 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out @@ -70,8 +70,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -104,6 +107,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -114,6 +127,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -128,6 +165,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -141,6 +192,25 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b @@ -247,8 +317,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -281,6 +354,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -291,6 +374,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -305,6 +412,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -318,6 +439,25 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b @@ -424,8 +564,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -458,6 +601,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -468,6 +621,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -482,6 +659,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -495,6 +686,25 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b diff --git a/ql/src/test/results/clientpositive/binary_output_format.q.out b/ql/src/test/results/clientpositive/binary_output_format.q.out index 96afeb6..c589541 100644 --- a/ql/src/test/results/clientpositive/binary_output_format.q.out +++ b/ql/src/test/results/clientpositive/binary_output_format.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -117,6 +118,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string) + outputColumnNames: mydata + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(mydata, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -168,6 +185,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -211,6 +257,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: mydata + Column Types: string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/bucket1.q.out b/ql/src/test/results/clientpositive/bucket1.q.out index 78571aa..68f0c2b 100644 --- a/ql/src/test/results/clientpositive/bucket1.q.out +++ b/ql/src/test/results/clientpositive/bucket1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -129,6 +131,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -163,6 +192,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket1_1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucket1_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/bucket2.q.out b/ql/src/test/results/clientpositive/bucket2.q.out index a8e9f90..fbc7ef7 100644 --- a/ql/src/test/results/clientpositive/bucket2.q.out +++ b/ql/src/test/results/clientpositive/bucket2.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -129,6 +130,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -163,6 +199,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/bucket3.q.out b/ql/src/test/results/clientpositive/bucket3.q.out index b1173e7..10ae348 100644 --- a/ql/src/test/results/clientpositive/bucket3.q.out +++ b/ql/src/test/results/clientpositive/bucket3.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -127,6 +129,34 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -160,6 +190,90 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out index 5478f12..eb6e44b 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out @@ -117,13 +117,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -197,7 +199,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -257,6 +259,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -393,6 +422,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -451,13 +557,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -531,7 +639,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -570,7 +678,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -591,6 +699,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -704,7 +839,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -727,6 +862,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out index e335683..52c2d71 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out @@ -101,13 +101,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -181,7 +183,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +243,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -377,6 +406,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -435,13 +541,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -515,7 +623,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -554,7 +662,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -575,6 +683,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -688,7 +823,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -711,6 +846,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out index bd5a438..f27e85f 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out @@ -101,13 +101,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -181,7 +183,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +243,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -377,6 +406,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -435,13 +541,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -515,7 +623,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -554,7 +662,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -575,6 +683,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -688,7 +823,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -711,6 +846,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out index 5743944..688fdfa 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out @@ -189,7 +189,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -212,7 +212,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -239,7 +239,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -262,7 +262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -289,7 +289,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -312,7 +312,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -545,7 +545,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -568,7 +568,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -618,7 +618,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -645,7 +645,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -668,7 +668,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out index 71b2924..406cca4 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -192,7 +192,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name value column.name.delimiter , @@ -241,7 +241,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -371,7 +371,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -480,7 +480,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -615,7 +615,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -724,7 +724,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -859,7 +859,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -968,7 +968,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out index febc2b6..e9a80cc 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -181,6 +181,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -272,6 +273,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -376,6 +393,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -418,6 +464,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -722,6 +776,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -792,7 +847,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -813,6 +868,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -917,6 +988,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -936,7 +1036,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -959,6 +1059,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -973,7 +1081,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1003,7 +1111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1024,7 +1132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1060,7 +1168,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1090,7 +1198,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1111,7 +1219,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index ee075c1..163a802 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -82,6 +82,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -212,6 +213,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -265,6 +282,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -307,6 +353,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index 2728bb7..7f21e60 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -91,6 +91,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -275,6 +276,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -328,6 +345,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -370,6 +416,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out index 165f0dc..132ec23 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -84,6 +85,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from ( @@ -153,6 +161,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -189,6 +198,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * from ( @@ -258,6 +274,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -294,6 +311,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key+x.key, x.value from @@ -312,6 +336,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -343,6 +369,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -359,6 +401,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.k1, concat(x.v1, x.v1) from @@ -377,6 +455,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -413,3 +492,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out index c5e03be..d3502a1 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out @@ -44,6 +44,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -80,6 +81,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.value, x.key from (SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x @@ -151,6 +159,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -182,6 +192,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), '1' (type: string) + outputColumnNames: value, key, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(key, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -198,6 +224,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from (SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out index 1d794c3..d581f24 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out @@ -65,39 +65,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -106,7 +81,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -121,8 +96,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -134,6 +107,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -150,98 +137,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key, concat(a.value, b.value) @@ -339,43 +265,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -388,7 +285,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -400,8 +297,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) @@ -413,6 +308,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -429,104 +338,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.value diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out index 1e70105..fb90070 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out @@ -65,43 +65,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -114,7 +85,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -129,8 +100,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -142,6 +111,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -158,110 +141,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -346,43 +256,14 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:test_table2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:test_table2 - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -395,7 +276,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -410,8 +291,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -423,6 +302,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -439,110 +332,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:test_table1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:test_table1 - TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out index f3d3006..29f8896 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out @@ -68,6 +68,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -116,6 +117,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, b.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -197,6 +205,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -245,6 +254,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT b.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b diff --git a/ql/src/test/results/clientpositive/case_sensitivity.q.out b/ql/src/test/results/clientpositive/case_sensitivity.q.out index b3969cc..0158826 100644 --- a/ql/src/test/results/clientpositive/case_sensitivity.q.out +++ b/ql/src/test/results/clientpositive/case_sensitivity.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 837 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/cast1.q.out b/ql/src/test/results/clientpositive/cast1.q.out index 9feb14f..b0bab3d 100644 --- a/ql/src/test/results/clientpositive/cast1.q.out +++ b/ql/src/test/results/clientpositive/cast1.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -44,6 +45,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: boolean), _col6 (type: int) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3348 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3372 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3372 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7 + Column Types: int, double, double, double, int, string, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index f260f03..d94cf30 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: state, locid, zip, year - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sq1:loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,13 +129,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: state, locid, $f2 - Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(locid) keys: state (type: string), $f2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -151,7 +151,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +159,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, $f2, $f2_0 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -767,30 +767,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, zip - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out index b296280..f8fd963 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out index d4d70bc..05ca4bd 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -65,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -79,6 +96,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) @@ -132,6 +182,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -175,6 +227,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -189,6 +256,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) diff --git a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out index c09764c..5ce2699 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -88,6 +109,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out index af21343..48c6d1e 100644 --- a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out +++ b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 3 rawDataSize 24 @@ -79,7 +79,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"year\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 3 rawDataSize 24 @@ -168,7 +168,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\"}} #### A masked pattern was here #### numFiles 2 numRows 3 @@ -191,46 +191,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(month) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: month (type: int) - outputColumnNames: month - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(month) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -322,29 +288,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: calendar - Statistics: Num rows: 136 Data size: 547 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: month (type: int) outputColumnNames: month - Statistics: Num rows: 136 Data size: 547 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(month) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/constprog_dp.q.out b/ql/src/test/results/clientpositive/constprog_dp.q.out index 8cf301d..b0e4d95 100644 --- a/ql/src/test/results/clientpositive/constprog_dp.q.out +++ b/ql/src/test/results/clientpositive/constprog_dp.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -68,6 +103,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out index 00bdb4c..b783d8a 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -106,10 +106,12 @@ ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 + Stage-2 depends on stages: Stage-1, Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-5 is a root stage + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 + Stage-6 is a root stage STAGE PLANS: Stage: Stage-1 @@ -198,6 +200,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -212,7 +229,36 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-5 + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -314,8 +360,12 @@ ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 + Stage-6 is a root stage STAGE PLANS: Stage: Stage-1 @@ -351,6 +401,120 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: m Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE @@ -383,85 +547,24 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 2956 Data size: 12099 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - - Stage: Stage-2 - Stats-Aggr Operator + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_co2 SELECT b.key, d.val @@ -510,21 +613,23 @@ JOIN ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-14 is a root stage - Stage-10 depends on stages: Stage-14 - Stage-9 depends on stages: Stage-10, Stage-11 , consists of Stage-12, Stage-13, Stage-2 - Stage-12 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-12 - Stage-0 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-3 depends on stages: Stage-0 + Stage-15 is a root stage + Stage-11 depends on stages: Stage-15 + Stage-10 depends on stages: Stage-11, Stage-12 , consists of Stage-13, Stage-14, Stage-2 Stage-13 has a backup stage: Stage-2 Stage-8 depends on stages: Stage-13 + Stage-0 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-3 depends on stages: Stage-0 + Stage-17 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-14 has a backup stage: Stage-2 + Stage-9 depends on stages: Stage-14 Stage-2 - Stage-15 is a root stage - Stage-11 depends on stages: Stage-15 + Stage-16 is a root stage + Stage-12 depends on stages: Stage-16 STAGE PLANS: - Stage: Stage-14 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_1:y @@ -547,7 +652,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -577,10 +682,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-9 + Stage: Stage-10 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -594,7 +699,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -618,6 +723,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -634,7 +754,36 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-13 + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -648,7 +797,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-8 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -672,6 +821,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -712,8 +876,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:$hdt$_1:m @@ -736,7 +915,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/cp_sel.q.out b/ql/src/test/results/clientpositive/cp_sel.q.out index 1778ccd..5259051 100644 --- a/ql/src/test/results/clientpositive/cp_sel.q.out +++ b/ql/src/test/results/clientpositive/cp_sel.q.out @@ -78,6 +78,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -109,6 +111,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testpartbucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -126,6 +144,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.testpartbucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out index 4382522..1f88831 100644 --- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -48,6 +48,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} numFiles 1 totalSize 578 #### A masked pattern was here #### @@ -71,7 +72,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -amount decimal(10,3) from deserializer +amount decimal(10,3) 12.123 123.123 0 2 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 diff --git a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index d199574..b4efaf4 100644 --- a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -61,7 +61,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -130,7 +130,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -199,7 +199,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -245,7 +245,7 @@ STAGE PLANS: partcol1 1 partcol2 __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out index 2abb819..3714189 100644 --- a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out @@ -220,9 +220,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -282,6 +286,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -296,11 +315,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -325,6 +380,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -336,9 +406,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -402,6 +494,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -461,6 +554,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -476,6 +582,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -491,6 +610,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -514,6 +659,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out index 76d0b7b..fa1b3d3 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -71,3 +72,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.non_acid + diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out index 5d894ab..18b2eb0 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -616,7 +616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/explain_ddl.q.out b/ql/src/test/results/clientpositive/explain_ddl.q.out index e108e22..079b1f2 100644 --- a/ql/src/test/results/clientpositive/explain_ddl.q.out +++ b/ql/src/test/results/clientpositive/explain_ddl.q.out @@ -496,6 +496,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -519,6 +520,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.m1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -542,6 +569,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.m1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index b212da9..323b1d5 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -102,7 +102,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -147,7 +147,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -215,7 +215,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -260,7 +260,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index b5f4fee..1e4a999 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -119,7 +119,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -164,7 +164,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -209,7 +209,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -254,7 +254,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -296,12 +296,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d @@ -322,7 +322,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -367,7 +367,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -412,7 +412,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -457,7 +457,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -499,12 +499,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state @@ -545,7 +545,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -590,7 +590,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -635,7 +635,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -680,7 +680,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -748,7 +748,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -793,7 +793,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -838,7 +838,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -883,7 +883,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -925,12 +925,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: create table if not exists loc_orc_2d ( @@ -1026,7 +1026,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1072,7 +1072,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1118,7 +1118,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1164,7 +1164,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1256,7 +1256,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1302,7 +1302,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1348,7 +1348,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1394,7 +1394,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1486,7 +1486,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1528,12 +1528,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_2d @@ -1555,7 +1555,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1601,7 +1601,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1647,7 +1647,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1693,7 +1693,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1785,7 +1785,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1831,7 +1831,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1877,7 +1877,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1923,7 +1923,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2015,7 +2015,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2057,11 +2057,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 8f9b636..884bfcd 100644 --- a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -89,7 +89,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -129,7 +129,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f, $hdt$_1:m] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:f, $hdt$_2:m] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -226,7 +226,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/groupby1.q.out b/ql/src/test/results/clientpositive/groupby1.q.out index 46e09dd..66e46a3 100644 --- a/ql/src/test/results/clientpositive/groupby1.q.out +++ b/ql/src/test/results/clientpositive/groupby1.q.out @@ -17,6 +17,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -78,6 +81,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -92,6 +105,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest_g1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), 16 (type: int), value (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby10.q.out b/ql/src/test/results/clientpositive/groupby10.q.out index 66832b0..2a00eb8 100644 --- a/ql/src/test/results/clientpositive/groupby10.q.out +++ b/ql/src/test/results/clientpositive/groupby10.q.out @@ -45,10 +45,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -119,6 +125,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -133,11 +149,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -156,7 +230,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -185,6 +259,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -196,9 +280,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -290,10 +418,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -364,6 +498,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -378,11 +522,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -401,7 +603,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -430,6 +632,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -441,9 +653,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -534,8 +790,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -574,6 +834,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: int) @@ -592,6 +862,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -606,6 +886,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -616,9 +932,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key diff --git a/ql/src/test/results/clientpositive/groupby11.q.out b/ql/src/test/results/clientpositive/groupby11.q.out index 1d0e86a..412733e 100644 --- a/ql/src/test/results/clientpositive/groupby11.q.out +++ b/ql/src/test/results/clientpositive/groupby11.q.out @@ -33,10 +33,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -107,6 +113,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), '111' (type: string) + outputColumnNames: key, val1, val2, ds + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -123,11 +139,78 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), 16 (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: substr(value, 5) (type: string), key (type: string) sort order: ++ Map-reduce partition columns: substr(value, 5) (type: string) @@ -146,7 +229,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -175,6 +258,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), '111' (type: string) + outputColumnNames: key, val1, val2, ds + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -188,9 +281,62 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), 16 (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 partition(ds='111') SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value diff --git a/ql/src/test/results/clientpositive/groupby12.q.out b/ql/src/test/results/clientpositive/groupby12.q.out index 921fc92..3db6a8a 100644 --- a/ql/src/test/results/clientpositive/groupby12.q.out +++ b/ql/src/test/results/clientpositive/groupby12.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +56,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +80,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), 16 (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby1_limit.q.out b/ql/src/test/results/clientpositive/groupby1_limit.q.out index 78a49eb..56a8c02 100644 --- a/ql/src/test/results/clientpositive/groupby1_limit.q.out +++ b/ql/src/test/results/clientpositive/groupby1_limit.q.out @@ -17,6 +17,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -40,7 +42,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator @@ -66,7 +67,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Select Operator @@ -88,6 +88,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,6 +117,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_map.q.out b/ql/src/test/results/clientpositive/groupby1_map.q.out index cc985a5..a69a482 100644 --- a/ql/src/test/results/clientpositive/groupby1_map.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map.q.out @@ -16,6 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +90,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out index cc985a5..a69a482 100644 --- a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out @@ -16,6 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +90,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_map_skew.q.out b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out index 116744a..bdbd7fb 100644 --- a/ql/src/test/results/clientpositive/groupby1_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out @@ -17,6 +17,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -84,6 +86,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,6 +115,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_noskew.q.out b/ql/src/test/results/clientpositive/groupby1_noskew.q.out index 98c0d3c..7aab2f3 100644 --- a/ql/src/test/results/clientpositive/groupby1_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby1_noskew.q.out @@ -16,6 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -53,6 +55,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -67,6 +79,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest_g1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), 16 (type: int), value (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby2_map.q.out b/ql/src/test/results/clientpositive/groupby2_map.q.out index 0dcd810..b03af2d 100644 --- a/ql/src/test/results/clientpositive/groupby2_map.q.out +++ b/ql/src/test/results/clientpositive/groupby2_map.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out index 64477db..3508f6e 100644 --- a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY @@ -120,6 +166,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -163,6 +211,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -177,6 +240,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out index 813ae5c..36b70bb 100644 --- a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out @@ -19,6 +19,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -86,6 +88,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +117,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/groupby2_noskew.q.out index 5192db3..1822d20 100644 --- a/ql/src/test/results/clientpositive/groupby2_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby2_noskew.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +56,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +80,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), 16 (type: int), c1 (type: int), c2 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out index 1c24213..6d44e3f 100644 --- a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +57,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +81,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), 16 (type: int), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby3.q.out b/ql/src/test/results/clientpositive/groupby3.q.out index 2ebeae4..648afc9 100644 --- a/ql/src/test/results/clientpositive/groupby3.q.out +++ b/ql/src/test/results/clientpositive/groupby3.q.out @@ -37,6 +37,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -73,6 +75,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: @@ -93,6 +96,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -107,6 +125,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_map.q.out b/ql/src/test/results/clientpositive/groupby3_map.q.out index 07c122e..9fe74de 100644 --- a/ql/src/test/results/clientpositive/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +112,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out index a4501f7..02d7bfd 100644 --- a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,6 +82,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out index e02bdeb..621339d 100644 --- a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out @@ -37,6 +37,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -100,6 +101,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -114,6 +135,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/groupby3_noskew.q.out index 624fd2d..8771ecd 100644 --- a/ql/src/test/results/clientpositive/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -70,6 +71,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out index a1d403d..aa282aa 100644 --- a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -88,6 +105,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby4.q.out b/ql/src/test/results/clientpositive/groupby4.q.out index 3f77e47..194691f 100644 --- a/ql/src/test/results/clientpositive/groupby4.q.out +++ b/ql/src/test/results/clientpositive/groupby4.q.out @@ -19,6 +19,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +75,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -86,6 +99,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string), 16 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby4_map.q.out b/ql/src/test/results/clientpositive/groupby4_map.q.out index a8a7fbe..a506770 100644 --- a/ql/src/test/results/clientpositive/groupby4_map.q.out +++ b/ql/src/test/results/clientpositive/groupby4_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +88,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out index 3f38895..7048766 100644 --- a/ql/src/test/results/clientpositive/groupby4_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +88,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/groupby4_noskew.q.out index c7db0d7..5d4cfae 100644 --- a/ql/src/test/results/clientpositive/groupby4_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +51,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -63,6 +75,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string), 16 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby5.q.out b/ql/src/test/results/clientpositive/groupby5.q.out index 9bf01ee..a65baa7 100644 --- a/ql/src/test/results/clientpositive/groupby5.q.out +++ b/ql/src/test/results/clientpositive/groupby5.q.out @@ -23,6 +23,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -84,6 +87,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,6 +111,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/groupby5_map.q.out b/ql/src/test/results/clientpositive/groupby5_map.q.out index 5fbd3d7..87b1ef8 100644 --- a/ql/src/test/results/clientpositive/groupby5_map.q.out +++ b/ql/src/test/results/clientpositive/groupby5_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +90,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out index 60b010b..6c21d2a 100644 --- a/ql/src/test/results/clientpositive/groupby5_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +90,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby5_noskew.q.out b/ql/src/test/results/clientpositive/groupby5_noskew.q.out index 612a0f6..d71f71c 100644 --- a/ql/src/test/results/clientpositive/groupby5_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby5_noskew.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +61,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +85,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out index b790224..411c93f 100644 --- a/ql/src/test/results/clientpositive/groupby6.q.out +++ b/ql/src/test/results/clientpositive/groupby6.q.out @@ -19,6 +19,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +75,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -86,6 +99,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string), 16 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby6_map.q.out b/ql/src/test/results/clientpositive/groupby6_map.q.out index 4ba3772..3432642 100644 --- a/ql/src/test/results/clientpositive/groupby6_map.q.out +++ b/ql/src/test/results/clientpositive/groupby6_map.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +56,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +85,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out index 5141c0d..69e4179 100644 --- a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -19,6 +19,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -91,6 +108,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/groupby6_noskew.q.out index fd796c7..58808a6 100644 --- a/ql/src/test/results/clientpositive/groupby6_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +51,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -63,6 +75,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string), 16 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby7_map.q.out b/ql/src/test/results/clientpositive/groupby7_map.q.out index 0ef29cd..901132f 100644 --- a/ql/src/test/results/clientpositive/groupby7_map.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -90,6 +94,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,11 +123,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -133,6 +188,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -144,9 +214,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out index 7c3b033..b9857d1 100644 --- a/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -69,6 +73,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -87,6 +106,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,6 +135,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -111,9 +181,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_map_skew.q.out b/ql/src/test/results/clientpositive/groupby7_map_skew.q.out index 4bfa52e..0fd47df 100644 --- a/ql/src/test/results/clientpositive/groupby7_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map_skew.q.out @@ -29,10 +29,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -116,6 +120,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -130,11 +149,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) @@ -154,7 +209,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -183,6 +238,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -194,9 +264,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_noskew.q.out b/ql/src/test/results/clientpositive/groupby7_noskew.q.out index 6178f58..06913d9 100644 --- a/ql/src/test/results/clientpositive/groupby7_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby7_noskew.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -78,6 +82,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -92,11 +106,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) @@ -121,6 +171,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -132,9 +192,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out index f38c428..c88f70b 100644 --- a/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out @@ -29,9 +29,13 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -108,6 +112,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -122,11 +136,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -152,6 +202,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -163,9 +223,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 diff --git a/ql/src/test/results/clientpositive/groupby8.q.out b/ql/src/test/results/clientpositive/groupby8.q.out index 1856a92..695c3c8 100644 --- a/ql/src/test/results/clientpositive/groupby8.q.out +++ b/ql/src/test/results/clientpositive/groupby8.q.out @@ -29,10 +29,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -103,6 +109,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -117,11 +133,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -140,7 +214,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -169,6 +243,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -180,9 +264,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -850,10 +978,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -924,6 +1058,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -938,11 +1082,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -961,7 +1163,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -990,6 +1192,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1001,9 +1213,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_map.q.out b/ql/src/test/results/clientpositive/groupby8_map.q.out index f683a8b..c887dd5 100644 --- a/ql/src/test/results/clientpositive/groupby8_map.q.out +++ b/ql/src/test/results/clientpositive/groupby8_map.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +72,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +105,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +134,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -110,9 +180,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out index 5e60d3e..de617f3 100644 --- a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out @@ -29,10 +29,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -115,6 +119,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -129,11 +148,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) @@ -152,7 +207,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -181,6 +236,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -192,9 +262,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_noskew.q.out b/ql/src/test/results/clientpositive/groupby8_noskew.q.out index f683a8b..1f98658 100644 --- a/ql/src/test/results/clientpositive/groupby8_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby8_noskew.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +72,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +100,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +124,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -110,9 +170,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby9.q.out b/ql/src/test/results/clientpositive/groupby9.q.out index 15ea185..4e6e588 100644 --- a/ql/src/test/results/clientpositive/groupby9.q.out +++ b/ql/src/test/results/clientpositive/groupby9.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -103,11 +122,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -131,6 +186,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -142,9 +212,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value @@ -812,9 +904,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -873,6 +969,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -887,11 +998,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -915,6 +1062,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -926,9 +1088,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key @@ -1596,9 +1780,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -1657,6 +1845,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1671,11 +1874,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -1699,6 +1938,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1710,9 +1964,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value @@ -2380,9 +2656,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -2442,6 +2722,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -2456,11 +2751,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -2485,6 +2816,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -2496,9 +2842,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value @@ -3166,9 +3534,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -3227,6 +3599,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -3241,11 +3628,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -3269,6 +3692,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -3280,9 +3718,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key diff --git a/ql/src/test/results/clientpositive/groupby_cube1.q.out b/ql/src/test/results/clientpositive/groupby_cube1.q.out index fd70a2c..1b14461 100644 --- a/ql/src/test/results/clientpositive/groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -552,10 +552,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -640,6 +644,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -654,11 +673,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -678,7 +733,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -708,6 +763,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -719,9 +789,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube diff --git a/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out b/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out index 1e13288..f2ad1f6 100644 --- a/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out +++ b/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out @@ -34,9 +34,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -92,6 +96,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,11 +125,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) @@ -130,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -141,6 +211,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out index 9be41df..90c3dba 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +200,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +260,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index 606ab4e..6e3336e 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +200,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +260,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out b/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out index 5f02b04..7ed6fd8 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -103,11 +122,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double) @@ -131,6 +186,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -142,9 +212,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table dest1 select key, count(distinct value) group by key insert overwrite table dest2 select key+key, count(distinct value) group by key+key diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out index 256784d..00fd1b4 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out @@ -54,10 +54,16 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 Stage-4 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-12 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -97,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +139,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -139,6 +175,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -153,6 +204,49 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -163,9 +257,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -176,9 +292,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-6 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) @@ -276,16 +414,26 @@ STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 Stage-6 depends on stages: Stage-2 + Stage-18 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-19 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-20 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-21 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-22 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-7 depends on stages: Stage-5 Stage-0 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-0 Stage-9 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-10 - Stage-11 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-9 - Stage-12 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-5 + Stage-12 depends on stages: Stage-5 + Stage-13 depends on stages: Stage-12 + Stage-3 depends on stages: Stage-13 + Stage-14 depends on stages: Stage-3 + Stage-15 depends on stages: Stage-13 + Stage-4 depends on stages: Stage-12 + Stage-16 depends on stages: Stage-4 + Stage-17 depends on stages: Stage-12 STAGE PLANS: Stage: Stage-5 @@ -335,6 +483,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -356,6 +519,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -377,6 +555,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -391,6 +584,63 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-20 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-21 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h2 + + Stage: Stage-22 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h3 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -401,9 +651,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -414,10 +686,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-8 + Stage: Stage-10 Stats-Aggr Operator - Stage: Stage-9 + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -467,8 +761,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-10 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -476,7 +785,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Select Operator @@ -498,6 +806,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Move Operator @@ -509,9 +832,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 - Stage: Stage-11 + Stage: Stage-14 Stats-Aggr Operator + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 Move Operator tables: @@ -522,9 +867,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 - Stage: Stage-12 + Stage: Stage-16 Stats-Aggr Operator + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out index 012b211..5151068 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -71,6 +75,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +111,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,6 +140,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_g2 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, int + Table: default.dest_g3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -116,9 +186,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index e41d9ef..c9b6cec 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -40,8 +40,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -83,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -104,6 +123,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +152,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -128,9 +198,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select key, count(*) @@ -206,8 +298,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -249,6 +345,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -270,6 +381,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -284,6 +410,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -294,9 +456,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select value, count(*) @@ -372,8 +556,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -415,6 +603,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -436,6 +639,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -450,6 +668,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -460,9 +714,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select key, count(*) @@ -538,8 +814,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -581,6 +861,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -602,6 +897,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -616,6 +926,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -626,9 +972,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select value, count(*) diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out index 3ded69c..e76ddc7 100644 --- a/ql/src/test/results/clientpositive/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/groupby_position.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -87,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,11 +120,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -129,6 +184,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -140,9 +210,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 @@ -218,9 +310,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -277,6 +373,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -291,11 +402,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -319,6 +466,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -330,9 +492,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 diff --git a/ql/src/test/results/clientpositive/groupby_ppr.q.out b/ql/src/test/results/clientpositive/groupby_ppr.q.out index 8a75c2e..f9e390c 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +193,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -224,6 +248,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), 16 (type: int), c1 (type: int), c2 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index 68d91eb..8f85fae 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +193,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -224,6 +248,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), 16 (type: int), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) @@ -285,6 +386,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -431,7 +534,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -452,6 +555,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -462,7 +587,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -485,6 +610,87 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), 16 (type: int), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/groupby_rollup1.q.out index 5fd011e..b78d062 100644 --- a/ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -397,10 +397,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -485,6 +489,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -499,11 +518,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -523,7 +578,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -553,6 +608,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -564,9 +634,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 92babd7..a891c69 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -106,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +178,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -203,6 +249,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -428,6 +482,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -555,6 +611,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -588,6 +671,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -629,6 +789,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -666,7 +827,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -687,6 +848,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -742,6 +919,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -761,7 +967,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -784,6 +990,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -798,7 +1012,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -828,7 +1042,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -849,7 +1063,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -885,7 +1099,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -915,7 +1129,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -936,7 +1150,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1003,6 +1217,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1040,7 +1255,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1061,6 +1276,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1116,6 +1347,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1135,7 +1395,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1158,6 +1418,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1172,7 +1440,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1202,7 +1470,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1223,7 +1491,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1259,7 +1527,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1289,7 +1557,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1310,7 +1578,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1385,6 +1653,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1443,6 +1712,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1498,6 +1783,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1540,6 +1854,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1766,6 +2088,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1893,6 +2217,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1926,38 +2277,115 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: #### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -PREHOOK: type: QUERY + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 +8 1 18 1 +8 1 28 1 +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl3 +SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 +PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -1966,6 +2394,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2072,7 +2502,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2093,6 +2523,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2103,7 +2560,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2126,6 +2583,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -2168,6 +2702,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2284,7 +2820,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2305,6 +2841,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2315,7 +2878,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2338,6 +2901,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2389,6 +3029,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2428,7 +3069,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2449,6 +3090,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2480,7 +3137,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2501,6 +3158,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2556,6 +3229,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2575,7 +3277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2598,6 +3300,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2612,7 +3322,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2642,7 +3352,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2663,7 +3373,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2699,7 +3409,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2729,7 +3439,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2750,7 +3460,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2839,6 +3549,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -2990,7 +3701,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3011,6 +3722,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3030,7 +3757,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3051,6 +3778,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3129,6 +3872,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3148,7 +3920,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3171,6 +3943,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -3185,7 +3965,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3215,7 +3995,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3236,7 +4016,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3272,7 +4052,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3302,7 +4082,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3323,7 +4103,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3409,6 +4189,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3548,7 +4330,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3569,6 +4351,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3579,7 +4388,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3602,6 +4411,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -3957,6 +4843,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3995,7 +4883,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4018,7 +4906,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4064,7 +4952,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4085,6 +4973,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4095,7 +5010,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4111,12 +5026,89 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4157,6 +5149,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4194,7 +5187,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4215,6 +5208,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4224,7 +5233,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4247,7 +5256,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4270,6 +5279,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4289,7 +5327,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4312,6 +5350,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4326,7 +5372,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4356,7 +5402,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4377,7 +5423,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4413,7 +5459,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4443,7 +5489,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4464,7 +5510,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4542,6 +5588,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4600,6 +5647,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4609,7 +5672,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4632,7 +5695,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4655,6 +5718,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4697,6 +5789,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4926,6 +6026,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4963,7 +6064,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4984,6 +6085,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4993,7 +6110,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5016,7 +6133,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5039,6 +6156,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5058,7 +6204,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5081,6 +6227,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5095,7 +6249,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5125,7 +6279,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5146,7 +6300,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5182,7 +6336,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5212,7 +6366,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5233,7 +6387,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5317,6 +6471,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5354,7 +6509,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5375,6 +6530,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5384,7 +6555,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5407,7 +6578,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5430,6 +6601,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5449,7 +6649,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5472,6 +6672,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5486,7 +6694,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5516,7 +6724,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5537,7 +6745,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5573,7 +6781,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5603,7 +6811,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5624,7 +6832,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5720,8 +6928,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5769,6 +6981,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5788,6 +7015,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5802,6 +7044,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -5812,9 +7090,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val @@ -5875,8 +7175,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5927,6 +7231,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5946,6 +7265,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5960,6 +7294,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -5970,9 +7340,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val diff --git a/ql/src/test/results/clientpositive/groupby_sort_2.q.out b/ql/src/test/results/clientpositive/groupby_sort_2.q.out index de6bf14..e49e956 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_2.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_2.q.out @@ -46,6 +46,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +92,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,6 +121,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: val, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT val, count(1) FROM T1 GROUP BY val PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_3.q.out b/ql/src/test/results/clientpositive/groupby_sort_3.q.out index da1db8c..d295a05 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_3.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_3.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -81,6 +82,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -183,6 +217,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -216,6 +251,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -239,6 +300,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/groupby_sort_4.q.out b/ql/src/test/results/clientpositive/groupby_sort_4.q.out index ae2ae66..bed68f6 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_4.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_4.q.out @@ -46,6 +46,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +92,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,6 +121,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY @@ -149,6 +195,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -192,6 +240,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -206,6 +269,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_5.q.out b/ql/src/test/results/clientpositive/groupby_sort_5.q.out index 40b9769..4b0e491 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_5.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_5.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -81,6 +82,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -211,6 +245,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -244,6 +279,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -267,6 +328,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -380,6 +448,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -424,6 +494,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -438,6 +523,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_6.q.out b/ql/src/test/results/clientpositive/groupby_sort_6.q.out index 80baa66..375f736 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -102,6 +104,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -135,6 +164,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key PREHOOK: type: QUERY @@ -176,6 +282,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -231,7 +339,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -252,6 +360,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -262,7 +397,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -285,6 +420,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key PREHOOK: type: QUERY @@ -317,6 +529,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -419,7 +633,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -440,6 +654,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -450,7 +691,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -473,6 +714,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_7.q.out b/ql/src/test/results/clientpositive/groupby_sort_7.q.out index 21b0a37..e7c9a95 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_7.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_7.q.out @@ -51,6 +51,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -84,6 +85,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -107,6 +134,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 6e09753..b22feb8 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -106,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +178,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -203,6 +249,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -429,6 +483,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -624,6 +680,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -657,6 +740,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -698,6 +858,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -735,7 +896,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -756,6 +917,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -811,6 +988,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -830,7 +1036,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -853,6 +1059,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -867,7 +1081,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -897,7 +1111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -918,7 +1132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -954,7 +1168,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -984,7 +1198,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1005,7 +1219,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1072,6 +1286,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1109,7 +1324,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1130,6 +1345,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1185,6 +1416,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1204,7 +1464,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1227,6 +1487,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1241,7 +1509,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1271,7 +1539,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1292,7 +1560,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1328,7 +1596,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1358,7 +1626,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1379,7 +1647,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1454,6 +1722,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1512,6 +1781,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1567,6 +1852,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1609,6 +1923,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1836,6 +2158,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2031,6 +2355,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2064,32 +2415,109 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: #### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: EXPLAIN EXTENDED @@ -2105,6 +2533,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2279,7 +2709,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2300,6 +2730,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2310,7 +2767,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2333,6 +2790,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -2376,6 +2910,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2560,7 +3096,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2581,6 +3117,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2591,7 +3154,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2614,6 +3177,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2665,6 +3305,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2704,7 +3345,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2725,6 +3366,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2756,7 +3413,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2777,6 +3434,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2832,6 +3505,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2851,7 +3553,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2874,6 +3576,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2888,7 +3598,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2918,7 +3628,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2939,7 +3649,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2975,7 +3685,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3005,7 +3715,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3026,7 +3736,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3116,6 +3826,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -3335,7 +4046,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3356,6 +4067,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3375,7 +4102,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3396,6 +4123,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3474,6 +4217,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3493,7 +4265,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3516,6 +4288,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -3530,7 +4310,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3560,7 +4340,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3581,7 +4361,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3617,7 +4397,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3647,7 +4427,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3668,7 +4448,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3754,6 +4534,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3893,7 +4675,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3914,6 +4696,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3924,7 +4733,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3947,6 +4756,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -4372,6 +5258,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -4410,7 +5298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4433,7 +5321,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4547,7 +5435,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4568,6 +5456,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4578,7 +5493,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4594,12 +5509,89 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-3 - Stats-Aggr Operator + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4640,6 +5632,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4677,7 +5670,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4698,6 +5691,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4707,7 +5716,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4730,7 +5739,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4753,6 +5762,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4772,7 +5810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4795,6 +5833,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4809,7 +5855,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4839,7 +5885,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4860,7 +5906,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4896,7 +5942,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4926,7 +5972,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4947,7 +5993,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5025,6 +6071,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5083,6 +6130,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5092,7 +6155,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5115,7 +6178,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5138,6 +6201,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5180,6 +6272,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5409,6 +6509,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5446,7 +6547,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5467,6 +6568,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5476,7 +6593,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5499,7 +6616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5522,6 +6639,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5541,7 +6687,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5564,6 +6710,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5578,7 +6732,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5608,7 +6762,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5629,7 +6783,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5665,7 +6819,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5695,7 +6849,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5716,7 +6870,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5800,6 +6954,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5837,7 +6992,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5858,6 +7013,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5867,7 +7038,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5890,7 +7061,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5913,6 +7084,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5932,7 +7132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5955,6 +7155,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5969,7 +7177,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5999,7 +7207,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6020,7 +7228,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6056,7 +7264,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6086,7 +7294,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6107,7 +7315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6204,8 +7412,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6253,6 +7465,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6296,6 +7523,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6310,6 +7552,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -6320,9 +7598,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val @@ -6384,8 +7684,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6436,6 +7740,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6479,6 +7798,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6493,6 +7827,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -6503,9 +7873,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val diff --git a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out index 5b94c0e..fcf6a08 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -81,6 +82,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out index c4b9dc4..be9f24a 100644 --- a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out +++ b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -56,6 +57,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.implicit_cast_during_insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: c1, c2, p1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + keys: p1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -72,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.implicit_cast_during_insert + PREHOOK: query: insert overwrite table implicit_cast_during_insert partition (p1) select key, value, key key1 from (select * from src where key in (0,1)) q distribute by key1 sort by key1 diff --git a/ql/src/test/results/clientpositive/index_auto_update.q.out b/ql/src/test/results/clientpositive/index_auto_update.q.out index e7bc069..6f90f5d 100644 --- a/ql/src/test/results/clientpositive/index_auto_update.q.out +++ b/ql/src/test/results/clientpositive/index_auto_update.q.out @@ -46,6 +46,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-13 depends on stages: Stage-3, Stage-4, Stage-5, Stage-7 Stage-4 depends on stages: Stage-2 Stage-5 depends on stages: Stage-2 Stage-7 depends on stages: Stage-0 @@ -72,6 +73,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-12 Conditional Operator @@ -137,6 +164,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: string, string + Table: default.temp + Stage: Stage-4 Stage: Stage-5 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out index 1aea388..9ac873e 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -142,7 +142,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -193,7 +193,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -244,7 +244,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 4970 @@ -295,7 +295,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -346,7 +346,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -397,7 +397,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -448,7 +448,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -499,7 +499,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -550,7 +550,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -601,7 +601,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -652,7 +652,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -703,7 +703,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -754,7 +754,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -805,7 +805,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -856,7 +856,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -907,7 +907,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -958,7 +958,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 2964 @@ -1009,7 +1009,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 5 rawDataSize 19 @@ -1060,7 +1060,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1111,7 +1111,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1162,7 +1162,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1213,7 +1213,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1264,7 +1264,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 3582 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out index 52ebe5a..2e996b9 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -62,8 +62,6 @@ SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key POSTHOOK: type: QUERY @@ -93,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -105,8 +103,8 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: 1 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index d62d0b8..d238545 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -58,7 +58,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -98,7 +98,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -161,7 +161,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -201,7 +201,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -266,7 +266,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 155 rawDataSize 586 @@ -306,7 +306,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 154 rawDataSize 591 @@ -428,9 +428,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -474,6 +476,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_table + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -500,6 +518,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -520,6 +545,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key, value, IF (key % 100 == 0, '11', '12') FROM (SELECT key, COUNT(*) AS value FROM srcpart @@ -568,7 +622,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 4 rawDataSize 14 @@ -608,7 +662,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 305 rawDataSize 1163 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index 3e29664..1f1f010 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -68,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(agg, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,6 +102,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: SELECT key, value, count(1) FROM src GROUP BY ROLLUP (key, value) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -747,7 +801,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 6309 @@ -1429,7 +1483,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 7547 @@ -1456,6 +1510,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1500,6 +1556,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(agg, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1516,6 +1588,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE PREHOOK: type: QUERY @@ -1552,7 +1660,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 9954 @@ -1607,7 +1715,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 11810 @@ -1634,6 +1742,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1678,6 +1788,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(agg, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1694,6 +1820,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) PREHOOK: type: QUERY @@ -1730,7 +1892,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 6054 @@ -1785,7 +1947,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 7290 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out index 98a2f5f..c8d1492 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out @@ -46,7 +46,7 @@ Database: default Table: list_bucketing_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 309 rawDataSize 1482 @@ -116,7 +116,7 @@ Database: default Table: list_bucketing_table2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out index 32edd73..8170ea6 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out @@ -58,6 +58,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -83,6 +84,7 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD File Output Operator compressed: false Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -91,6 +93,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out +======= + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +>>>>>>> out Stage: Stage-7 Conditional Operator @@ -116,6 +159,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -180,7 +230,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -218,6 +268,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -273,6 +325,7 @@ STAGE PLANS: expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD File Output Operator compressed: false Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE @@ -281,6 +334,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out +======= + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +>>>>>>> out Stage: Stage-0 Move Operator @@ -297,6 +373,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT a.key, a.value FROM ( SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key @@ -372,6 +484,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -403,6 +516,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -428,6 +569,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -494,7 +642,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -523,6 +671,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -557,6 +707,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 @@ -567,6 +718,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out +======= + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), '1' (type: string) + outputColumnNames: key, value, part + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +>>>>>>> out Stage: Stage-0 Move Operator @@ -583,6 +757,38 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key GROUP BY b.value @@ -621,7 +827,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2728 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out index bf77d4c..bf4daf2 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out index 59b20fe..114fcfe 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out @@ -46,7 +46,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -85,7 +85,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -142,7 +142,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -181,7 +181,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2718 @@ -238,7 +238,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -277,7 +277,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -334,7 +334,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -373,7 +373,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2690 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out index 0c61fe0..c353a0f 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -56,6 +58,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-04-08' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +91,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-04-08' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key2, value, cast(hr as int) FROM (SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr @@ -123,7 +177,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 670 rawDataSize 5888 @@ -163,7 +217,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 330 rawDataSize 2924 diff --git a/ql/src/test/results/clientpositive/innerjoin.q.out b/ql/src/test/results/clientpositive/innerjoin.q.out index 4a7649d..23d3777 100644 --- a/ql/src/test/results/clientpositive/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/innerjoin.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input11.q.out b/ql/src/test/results/clientpositive/input11.q.out index bb22ee8..9543c65 100644 --- a/ql/src/test/results/clientpositive/input11.q.out +++ b/ql/src/test/results/clientpositive/input11.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input11_limit.q.out b/ql/src/test/results/clientpositive/input11_limit.q.out index 597554e..4a1b77a 100644 --- a/ql/src/test/results/clientpositive/input11_limit.q.out +++ b/ql/src/test/results/clientpositive/input11_limit.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -39,7 +41,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +91,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input12.q.out b/ql/src/test/results/clientpositive/input12.q.out index 3bb765c..470bbf2 100644 --- a/ql/src/test/results/clientpositive/input12.q.out +++ b/ql/src/test/results/clientpositive/input12.q.out @@ -40,23 +40,18 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-14 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-15 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-16 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-1 depends on stages: Stage-3 Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 + Stage-11 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 + Stage-12 depends on stages: Stage-2 + Stage-13 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -80,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 200) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -110,6 +133,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -133,6 +185,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -163,15 +236,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -189,40 +253,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator - - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -237,38 +284,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-16 + Stage: Stage-12 Stats-Aggr Operator - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-19 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 diff --git a/ql/src/test/results/clientpositive/input13.q.out b/ql/src/test/results/clientpositive/input13.q.out index 91ff234..1bc031d 100644 --- a/ql/src/test/results/clientpositive/input13.q.out +++ b/ql/src/test/results/clientpositive/input13.q.out @@ -42,29 +42,19 @@ STAGE DEPENDENCIES: Stage-7 Stage-0 depends on stages: Stage-7, Stage-6, Stage-9 Stage-5 depends on stages: Stage-0 + Stage-15 depends on stages: Stage-5, Stage-11, Stage-12, Stage-13, Stage-14, Stage-3 + Stage-16 depends on stages: Stage-5, Stage-11, Stage-12, Stage-13, Stage-14, Stage-3 + Stage-17 depends on stages: Stage-5, Stage-11, Stage-12, Stage-13, Stage-14, Stage-3 Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 - Stage-16 depends on stages: Stage-4 , consists of Stage-13, Stage-12, Stage-14 - Stage-13 - Stage-1 depends on stages: Stage-13, Stage-12, Stage-15 + Stage-1 depends on stages: Stage-4 Stage-11 depends on stages: Stage-1 - Stage-12 - Stage-14 - Stage-15 depends on stages: Stage-14 - Stage-22 depends on stages: Stage-4 , consists of Stage-19, Stage-18, Stage-20 - Stage-19 - Stage-2 depends on stages: Stage-19, Stage-18, Stage-21 - Stage-17 depends on stages: Stage-2 - Stage-18 - Stage-20 - Stage-21 depends on stages: Stage-20 - Stage-27 depends on stages: Stage-4 , consists of Stage-24, Stage-23, Stage-25 - Stage-24 - Stage-3 depends on stages: Stage-24, Stage-23, Stage-26 - Stage-23 - Stage-25 - Stage-26 depends on stages: Stage-25 + Stage-12 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-4 + Stage-13 depends on stages: Stage-2 + Stage-14 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 @@ -88,6 +78,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -103,6 +106,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((key >= 200) and (key < 300)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +136,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 300) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +166,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -155,6 +202,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-6 Map Reduce Map Operator Tree: @@ -185,15 +253,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-16 - Conditional Operator - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -211,40 +270,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-15 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-22 - Conditional Operator - - Stage: Stage-19 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -259,47 +301,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-17 + Stage: Stage-13 Stats-Aggr Operator - Stage: Stage-18 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-21 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-27 - Conditional Operator - - Stage: Stage-24 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### Stage: Stage-3 Move Operator @@ -307,34 +339,6 @@ STAGE PLANS: hdfs directory: true destination: target/warehouse/dest4.out - Stage: Stage-23 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-25 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-26 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 diff --git a/ql/src/test/results/clientpositive/input14.q.out b/ql/src/test/results/clientpositive/input14.q.out index af04a98..e5120fc 100644 --- a/ql/src/test/results/clientpositive/input14.q.out +++ b/ql/src/test/results/clientpositive/input14.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -83,6 +100,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value) diff --git a/ql/src/test/results/clientpositive/input14_limit.q.out b/ql/src/test/results/clientpositive/input14_limit.q.out index 9870ad5..828a094 100644 --- a/ql/src/test/results/clientpositive/input14_limit.q.out +++ b/ql/src/test/results/clientpositive/input14_limit.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,7 +55,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -79,7 +80,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -104,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +133,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value) diff --git a/ql/src/test/results/clientpositive/input17.q.out b/ql/src/test/results/clientpositive/input17.q.out index 057a92d..8ecf7f9 100644 --- a/ql/src/test/results/clientpositive/input17.q.out +++ b/ql/src/test/results/clientpositive/input17.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +97,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src_thrift SELECT TRANSFORM(src_thrift.aint + src_thrift.lint[0], src_thrift.lintstring[0]) diff --git a/ql/src/test/results/clientpositive/input18.q.out b/ql/src/test/results/clientpositive/input18.q.out index b341510..be98cda 100644 --- a/ql/src/test/results/clientpositive/input18.q.out +++ b/ql/src/test/results/clientpositive/input18.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -83,6 +100,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value, 1+2, 3+4) diff --git a/ql/src/test/results/clientpositive/input1_limit.q.out b/ql/src/test/results/clientpositive/input1_limit.q.out index 0ca1552..4e69b72 100644 --- a/ql/src/test/results/clientpositive/input1_limit.q.out +++ b/ql/src/test/results/clientpositive/input1_limit.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -52,7 +56,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Filter Operator predicate: (key < 100) (type: boolean) @@ -90,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,14 +122,49 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -133,6 +186,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -144,9 +212,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key < 100 LIMIT 5 diff --git a/ql/src/test/results/clientpositive/input20.q.out b/ql/src/test/results/clientpositive/input20.q.out index cf0ee1d..076deaf 100644 --- a/ql/src/test/results/clientpositive/input20.q.out +++ b/ql/src/test/results/clientpositive/input20.q.out @@ -36,6 +36,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -85,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -99,6 +116,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src MAP src.key, src.key diff --git a/ql/src/test/results/clientpositive/input30.q.out b/ql/src/test/results/clientpositive/input30.q.out index 130f22d..a9fb038 100644 --- a/ql/src/test/results/clientpositive/input30.q.out +++ b/ql/src/test/results/clientpositive/input30.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -78,6 +99,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest30 + PREHOOK: query: insert overwrite table dest30 select count(1) from src PREHOOK: type: QUERY @@ -110,6 +138,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -147,6 +176,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -161,6 +210,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.dest30 + PREHOOK: query: insert overwrite table dest30 select count(1) from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input31.q.out b/ql/src/test/results/clientpositive/input31.q.out index 264ebe5..0fe569b 100644 --- a/ql/src/test/results/clientpositive/input31.q.out +++ b/ql/src/test/results/clientpositive/input31.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest31 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -80,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest31 + PREHOOK: query: insert overwrite table dest31 select count(1) from srcbucket PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input32.q.out b/ql/src/test/results/clientpositive/input32.q.out index c8fdfd4..97a2f22 100644 --- a/ql/src/test/results/clientpositive/input32.q.out +++ b/ql/src/test/results/clientpositive/input32.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -63,6 +64,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest32 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -77,6 +98,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest32 + PREHOOK: query: insert overwrite table dest32 select count(1) from srcbucket PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input33.q.out b/ql/src/test/results/clientpositive/input33.q.out index b35e2d0..9d6fcc0 100644 --- a/ql/src/test/results/clientpositive/input33.q.out +++ b/ql/src/test/results/clientpositive/input33.q.out @@ -36,6 +36,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -85,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -99,6 +116,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src MAP src.key, src.key diff --git a/ql/src/test/results/clientpositive/input34.q.out b/ql/src/test/results/clientpositive/input34.q.out index 72f66c3..201da7e 100644 --- a/ql/src/test/results/clientpositive/input34.q.out +++ b/ql/src/test/results/clientpositive/input34.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input35.q.out b/ql/src/test/results/clientpositive/input35.q.out index 8b86991..7509d14 100644 --- a/ql/src/test/results/clientpositive/input35.q.out +++ b/ql/src/test/results/clientpositive/input35.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input36.q.out b/ql/src/test/results/clientpositive/input36.q.out index 76921ad..d018fbe 100644 --- a/ql/src/test/results/clientpositive/input36.q.out +++ b/ql/src/test/results/clientpositive/input36.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input38.q.out b/ql/src/test/results/clientpositive/input38.q.out index 0c4e81d..36efd44 100644 --- a/ql/src/test/results/clientpositive/input38.q.out +++ b/ql/src/test/results/clientpositive/input38.q.out @@ -28,6 +28,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -58,6 +59,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -81,6 +108,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input3_limit.q.out b/ql/src/test/results/clientpositive/input3_limit.q.out index c7bc4ac..19e5b79 100644 --- a/ql/src/test/results/clientpositive/input3_limit.q.out +++ b/ql/src/test/results/clientpositive/input3_limit.q.out @@ -41,6 +41,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -58,7 +60,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -81,7 +82,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -99,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -113,6 +128,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git a/ql/src/test/results/clientpositive/input5.q.out b/ql/src/test/results/clientpositive/input5.q.out index a399528..c137f8b 100644 --- a/ql/src/test/results/clientpositive/input5.q.out +++ b/ql/src/test/results/clientpositive/input5.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +97,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src_thrift SELECT TRANSFORM(src_thrift.lint, src_thrift.lintstring) diff --git a/ql/src/test/results/clientpositive/input6.q.out b/ql/src/test/results/clientpositive/input6.q.out index 3d1a815..cf007cf 100644 --- a/ql/src/test/results/clientpositive/input6.q.out +++ b/ql/src/test/results/clientpositive/input6.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input7.q.out b/ql/src/test/results/clientpositive/input7.q.out index 0545b1f..380382f 100644 --- a/ql/src/test/results/clientpositive/input7.q.out +++ b/ql/src/test/results/clientpositive/input7.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: int) + outputColumnNames: c1, c2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: double, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input8.q.out b/ql/src/test/results/clientpositive/input8.q.out index d76fc2b..a16613f 100644 --- a/ql/src/test/results/clientpositive/input8.q.out +++ b/ql/src/test/results/clientpositive/input8.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input9.q.out b/ql/src/test/results/clientpositive/input9.q.out index af752e0..fda109a 100644 --- a/ql/src/test/results/clientpositive/input9.q.out +++ b/ql/src/test/results/clientpositive/input9.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: void), _col1 (type: int) + outputColumnNames: value, key + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(key, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_part1.q.out b/ql/src/test/results/clientpositive/input_part1.q.out index 2f6f302..00b294e 100644 --- a/ql/src/test/results/clientpositive/input_part1.q.out +++ b/ql/src/test/results/clientpositive/input_part1.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +142,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -167,6 +213,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_part10.q.out b/ql/src/test/results/clientpositive/input_part10.q.out index 4b552ba..b9a5127 100644 --- a/ql/src/test/results/clientpositive/input_part10.q.out +++ b/ql/src/test/results/clientpositive/input_part10.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +50,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Select Operator @@ -58,6 +59,7 @@ STAGE PLANS: Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +<<<<<<< HEAD Select Operator expressions: UDFToString(_col0) (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 @@ -70,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_special +======= + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.part_special + Select Operator + expressions: _col0 (type: int), _col1 (type: int), '2008 04 08' (type: string), '10:11:12=455' (type: string) + outputColumnNames: a, b, ds, ts + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + keys: ds (type: string), ts (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1174 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +>>>>>>> out Stage: Stage-0 Move Operator @@ -87,6 +115,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: string, string + Table: default.part_special + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1174 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455') SELECT 1, 2 FROM src LIMIT 1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input_part2.q.out b/ql/src/test/results/clientpositive/input_part2.q.out index 95f36ab..866636f 100644 --- a/ql/src/test/results/clientpositive/input_part2.q.out +++ b/ql/src/test/results/clientpositive/input_part2.q.out @@ -30,16 +30,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -89,6 +87,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-08' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) @@ -129,6 +143,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -231,6 +272,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -273,6 +343,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest2 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -453,15 +539,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -499,180 +576,69 @@ STAGE PLANS: Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + MultiFileSpray: false PREHOOK: query: FROM srcpart INSERT OVERWRITE TABLE dest1 SELECT srcpart.key, srcpart.value, srcpart.hr, srcpart.ds WHERE srcpart.key < 100 and srcpart.ds = '2008-04-08' and srcpart.hr = '12' diff --git a/ql/src/test/results/clientpositive/input_part5.q.out b/ql/src/test/results/clientpositive/input_part5.q.out index c6ae2fd..42e1ea5 100644 --- a/ql/src/test/results/clientpositive/input_part5.q.out +++ b/ql/src/test/results/clientpositive/input_part5.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_testsequencefile.q.out b/ql/src/test/results/clientpositive/input_testsequencefile.q.out index 60aaf83..b3f3def 100644 --- a/ql/src/test/results/clientpositive/input_testsequencefile.q.out +++ b/ql/src/test/results/clientpositive/input_testsequencefile.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4_sequencefile + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest4_sequencefile + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_testxpath.q.out b/ql/src/test/results/clientpositive/input_testxpath.q.out index e07628a..b81b56c 100644 --- a/ql/src/test/results/clientpositive/input_testxpath.q.out +++ b/ql/src/test/results/clientpositive/input_testxpath.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, mapvalue + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(mapvalue, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, mapvalue + Column Types: int, string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_testxpath2.q.out b/ql/src/test/results/clientpositive/input_testxpath2.q.out index a0baccf..373fcb0 100644 --- a/ql/src/test/results/clientpositive/input_testxpath2.q.out +++ b/ql/src/test/results/clientpositive/input_testxpath2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: lint_size, lintstring_size, mstringstring_size + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(lint_size, 16), compute_stats(lintstring_size, 16), compute_stats(mstringstring_size, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: lint_size, lintstring_size, mstringstring_size + Column Types: int, int, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert1.q.out b/ql/src/test/results/clientpositive/insert1.q.out index 3952578..685bfe2 100644 --- a/ql/src/test/results/clientpositive/insert1.q.out +++ b/ql/src/test/results/clientpositive/insert1.q.out @@ -34,6 +34,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -60,6 +61,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -83,6 +110,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -123,6 +157,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +184,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -172,6 +233,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -226,6 +294,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -252,6 +321,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -275,6 +370,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -315,6 +417,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -341,6 +444,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -364,6 +493,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -410,16 +546,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -443,6 +577,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -458,6 +605,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -481,6 +656,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -511,15 +700,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -537,31 +717,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: CREATE DATABASE db2 PREHOOK: type: CREATEDATABASE diff --git a/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out index 49c1269..5231349 100644 --- a/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out @@ -42,6 +42,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -58,7 +60,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -75,6 +76,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string), '11' (type: string) + outputColumnNames: one, two, ds, hr + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -92,6 +109,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE destinTable PARTITION (ds='2011-11-11', hr='11') if not exists SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 PREHOOK: type: QUERY @@ -175,6 +228,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,7 +246,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -208,6 +262,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: one, two + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -222,6 +291,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE destinTable SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@sourcetable diff --git a/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out index b5f28d2..15e6d76 100644 --- a/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out @@ -53,6 +53,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,7 +71,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -86,6 +87,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,6 +119,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 PREHOOK: type: QUERY @@ -141,6 +194,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -157,7 +212,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -174,6 +228,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -190,6 +260,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/insert_into1.q.out b/ql/src/test/results/clientpositive/insert_into1.q.out index da863a7..46c0512 100644 --- a/ql/src/test/results/clientpositive/insert_into1.q.out +++ b/ql/src/test/results/clientpositive/insert_into1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -34,7 +36,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -56,6 +57,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -70,6 +86,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -126,6 +171,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -142,7 +189,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -164,6 +210,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -178,6 +239,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -234,6 +324,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -250,7 +342,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -272,6 +363,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -286,6 +392,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -344,6 +479,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -368,6 +504,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -391,6 +553,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -441,6 +610,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -465,6 +635,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -488,6 +684,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_into2.q.out b/ql/src/test/results/clientpositive/insert_into2.q.out index 46fab7b..0469bfe 100644 --- a/ql/src/test/results/clientpositive/insert_into2.q.out +++ b/ql/src/test/results/clientpositive/insert_into2.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -38,7 +40,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -60,6 +61,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -76,6 +93,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -171,6 +224,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -187,7 +242,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -209,6 +263,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -225,6 +295,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 PREHOOK: type: QUERY @@ -289,6 +395,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -305,7 +413,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -327,6 +434,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -343,6 +466,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/insert_into3.q.out b/ql/src/test/results/clientpositive/insert_into3.q.out index ae7523b..e1e943d 100644 --- a/ql/src/test/results/clientpositive/insert_into3.q.out +++ b/ql/src/test/results/clientpositive/insert_into3.q.out @@ -32,9 +32,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -51,7 +55,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -82,6 +85,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -96,15 +114,50 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -125,6 +178,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -136,9 +204,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100 PREHOOK: type: QUERY @@ -191,9 +281,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -212,7 +306,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Select Operator expressions: key (type: string), value (type: string) @@ -247,6 +340,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -261,14 +369,49 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -290,6 +433,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -301,9 +459,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE insert_into3a SELECT * LIMIT 10 INSERT INTO TABLE insert_into3b SELECT * LIMIT 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/insert_into4.q.out b/ql/src/test/results/clientpositive/insert_into4.q.out index bb4e557..c24ecdd 100644 --- a/ql/src/test/results/clientpositive/insert_into4.q.out +++ b/ql/src/test/results/clientpositive/insert_into4.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +50,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -70,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,6 +100,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -115,6 +160,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -133,7 +180,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -155,6 +201,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -169,6 +230,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -202,6 +292,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -225,6 +316,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -248,6 +365,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_into5.q.out b/ql/src/test/results/clientpositive/insert_into5.q.out index 7b471f4..1eaebc7 100644 --- a/ql/src/test/results/clientpositive/insert_into5.q.out +++ b/ql/src/test/results/clientpositive/insert_into5.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +50,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +96,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into5a SELECT 1, 'one' FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -113,6 +158,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +182,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -159,6 +231,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -224,6 +303,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -247,6 +327,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -272,6 +386,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -339,6 +460,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -362,6 +484,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -387,6 +543,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_into6.q.out b/ql/src/test/results/clientpositive/insert_into6.q.out index d93a167..ba062f1 100644 --- a/ql/src/test/results/clientpositive/insert_into6.q.out +++ b/ql/src/test/results/clientpositive/insert_into6.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -50,7 +52,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -72,6 +73,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -88,6 +105,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into6a PARTITION (ds='1') SELECT * FROM src LIMIT 150 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -137,6 +190,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -160,6 +214,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -185,6 +273,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out b/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out index 24db44f..50d638b 100644 --- a/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out +++ b/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out @@ -305,6 +305,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 1 numRows 0 rawDataSize 0 @@ -427,6 +428,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 2 numRows 0 rawDataSize 0 @@ -677,7 +679,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -869,6 +871,7 @@ Database: default Table: sp #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 0 rawDataSize 0 @@ -920,7 +923,7 @@ Database: default Table: sp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 502 rawDataSize 5318 diff --git a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out index c2732c8..db70465 100644 --- a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out +++ b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out @@ -100,6 +100,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -130,6 +132,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: change, num + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(change, 16), compute_stats(num, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -144,6 +161,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: change, num + Column Types: string, string + Table: default.temp1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: CREATE TABLE temp2 ( create_ts STRING , @@ -178,6 +224,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -209,6 +257,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: create_ts, change, num + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(create_ts, 16), compute_stats(change, 16), compute_stats(num, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -223,6 +286,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: create_ts, change, num + Column Types: string, string, string + Table: default.temp2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data) PREHOOK: type: QUERY PREHOOK: Input: default@bucketoutput1 diff --git a/ql/src/test/results/clientpositive/join14.q.out b/ql/src/test/results/clientpositive/join14.q.out index 66e42f1..03c3dba 100644 --- a/ql/src/test/results/clientpositive/join14.q.out +++ b/ql/src/test/results/clientpositive/join14.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join17.q.out b/ql/src/test/results/clientpositive/join17.q.out index 8bd67eb..6b3e4d7a 100644 --- a/ql/src/test/results/clientpositive/join17.q.out +++ b/ql/src/test/results/clientpositive/join17.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -162,6 +164,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -195,6 +224,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join2.q.out b/ql/src/test/results/clientpositive/join2.q.out index e3d26a2..ffdd2b5 100644 --- a/ql/src/test/results/clientpositive/join2.q.out +++ b/ql/src/test/results/clientpositive/join2.q.out @@ -19,6 +19,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -117,6 +119,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -131,6 +148,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join25.q.out b/ql/src/test/results/clientpositive/join25.q.out index 5ad95c5..99b2bb1 100644 --- a/ql/src/test/results/clientpositive/join25.q.out +++ b/ql/src/test/results/clientpositive/join25.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join26.q.out b/ql/src/test/results/clientpositive/join26.q.out index 5bc8245..647a9a0 100644 --- a/ql/src/test/results/clientpositive/join26.q.out +++ b/ql/src/test/results/clientpositive/join26.q.out @@ -19,13 +19,15 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.key = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:y @@ -74,7 +76,7 @@ STAGE PLANS: 2 _col0 (type: string) Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -136,6 +138,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -316,6 +345,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join27.q.out b/ql/src/test/results/clientpositive/join27.q.out index 8b43f3f..0457943 100644 --- a/ql/src/test/results/clientpositive/join27.q.out +++ b/ql/src/test/results/clientpositive/join27.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) diff --git a/ql/src/test/results/clientpositive/join28.q.out b/ql/src/test/results/clientpositive/join28.q.out index 309bdcd..58ffed0 100644 --- a/ql/src/test/results/clientpositive/join28.q.out +++ b/ql/src/test/results/clientpositive/join28.q.out @@ -23,13 +23,15 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -70,7 +72,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -115,6 +117,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -131,6 +148,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value FROM diff --git a/ql/src/test/results/clientpositive/join29.q.out b/ql/src/test/results/clientpositive/join29.q.out index b53143d..7b69c4f 100644 --- a/ql/src/test/results/clientpositive/join29.q.out +++ b/ql/src/test/results/clientpositive/join29.q.out @@ -20,15 +20,17 @@ FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 - Stage-8 has a backup stage: Stage-2 - Stage-5 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2, Stage-5, Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-10 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-10 Stage-2 - Stage-4 is a root stage + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -70,10 +72,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -87,7 +89,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -111,6 +113,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -127,7 +144,36 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-9 + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, cnt1, cnt2 + Column Types: string, int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -141,7 +187,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -165,6 +211,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -206,8 +267,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join3.q.out b/ql/src/test/results/clientpositive/join3.q.out index fb378f4..f3d198d 100644 --- a/ql/src/test/results/clientpositive/join3.q.out +++ b/ql/src/test/results/clientpositive/join3.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -92,6 +94,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,6 +123,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join30.q.out b/ql/src/test/results/clientpositive/join30.q.out index 3bd6db1..b82e287 100644 --- a/ql/src/test/results/clientpositive/join30.q.out +++ b/ql/src/test/results/clientpositive/join30.q.out @@ -15,13 +15,15 @@ INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:x @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join31.q.out b/ql/src/test/results/clientpositive/join31.q.out index 32eab4f..a36baa1 100644 --- a/ql/src/test/results/clientpositive/join31.q.out +++ b/ql/src/test/results/clientpositive/join31.q.out @@ -22,10 +22,12 @@ group by subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-8 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5 + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -60,7 +62,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_1:y @@ -133,6 +135,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -147,6 +164,35 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.dest_j1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index 0358968..418ae10 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -19,13 +19,15 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:y @@ -72,7 +74,7 @@ STAGE PLANS: 1 _col1 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -141,6 +143,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -321,6 +350,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index 0358968..418ae10 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -19,13 +19,15 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:y @@ -72,7 +74,7 @@ STAGE PLANS: 1 _col1 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -141,6 +143,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -321,6 +350,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join34.q.out b/ql/src/test/results/clientpositive/join34.q.out index 117afac..f1d7c40 100644 --- a/ql/src/test/results/clientpositive/join34.q.out +++ b/ql/src/test/results/clientpositive/join34.q.out @@ -27,13 +27,15 @@ FROM JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -59,7 +61,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -121,6 +123,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -180,6 +209,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -312,6 +368,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value FROM diff --git a/ql/src/test/results/clientpositive/join35.q.out b/ql/src/test/results/clientpositive/join35.q.out index 1f90df9..f75a09e 100644 --- a/ql/src/test/results/clientpositive/join35.q.out +++ b/ql/src/test/results/clientpositive/join35.q.out @@ -28,11 +28,13 @@ JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-1, Stage-5 + Stage-7 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 - Stage-4 is a root stage + Stage-9 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -143,7 +145,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -169,7 +171,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -221,6 +223,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan GatherStats: false Union @@ -270,6 +299,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -299,7 +355,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10004 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -400,10 +456,87 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, int + Table: default.dest_j1 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false diff --git a/ql/src/test/results/clientpositive/join36.q.out b/ql/src/test/results/clientpositive/join36.q.out index 43a091f..9971a9a 100644 --- a/ql/src/test/results/clientpositive/join36.q.out +++ b/ql/src/test/results/clientpositive/join36.q.out @@ -57,13 +57,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -86,7 +88,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -119,6 +121,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -135,6 +152,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, int, int + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join37.q.out b/ql/src/test/results/clientpositive/join37.q.out index b0a2ee3..eb50db5 100644 --- a/ql/src/test/results/clientpositive/join37.q.out +++ b/ql/src/test/results/clientpositive/join37.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join39.q.out b/ql/src/test/results/clientpositive/join39.q.out index 628a44a..8dbd2ba 100644 --- a/ql/src/test/results/clientpositive/join39.q.out +++ b/ql/src/test/results/clientpositive/join39.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -72,6 +74,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, key1, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(key1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -88,6 +105,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, key1, val2 + Column Types: string, string, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join4.q.out b/ql/src/test/results/clientpositive/join4.q.out index d9510f5..83f8f75 100644 --- a/ql/src/test/results/clientpositive/join4.q.out +++ b/ql/src/test/results/clientpositive/join4.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join43.q.out b/ql/src/test/results/clientpositive/join43.q.out index e8c7278..c858c83 100644 --- a/ql/src/test/results/clientpositive/join43.q.out +++ b/ql/src/test/results/clientpositive/join43.q.out @@ -209,13 +209,13 @@ from ( ) list POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -275,7 +275,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -306,16 +306,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - TableScan alias: events Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -330,22 +324,32 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0 + 0 _col0 (type: string), _col1 (type: int) + 1 _col0 (type: string), _col2 (type: int) + outputColumnNames: _col2 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -427,13 +431,13 @@ from ( ) list POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -493,7 +497,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -520,17 +524,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - TableScan alias: events Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -546,17 +543,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col3 (type: int) Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col5 + 0 _col0 (type: string), _col3 (type: int) + 1 _col0 (type: string), _col2 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col6 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col5 (type: int) + expressions: _col4 (type: string), _col5 (type: int), _col6 (type: int), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/join5.q.out b/ql/src/test/results/clientpositive/join5.q.out index bf1cde3..4469da6 100644 --- a/ql/src/test/results/clientpositive/join5.q.out +++ b/ql/src/test/results/clientpositive/join5.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join6.q.out b/ql/src/test/results/clientpositive/join6.q.out index 77c8c3a..19b12e8 100644 --- a/ql/src/test/results/clientpositive/join6.q.out +++ b/ql/src/test/results/clientpositive/join6.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join7.q.out b/ql/src/test/results/clientpositive/join7.q.out index f0f437c..31a6032 100644 --- a/ql/src/test/results/clientpositive/join7.q.out +++ b/ql/src/test/results/clientpositive/join7.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +128,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -140,6 +157,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join8.q.out b/ql/src/test/results/clientpositive/join8.q.out index c1035b4..7640597 100644 --- a/ql/src/test/results/clientpositive/join8.q.out +++ b/ql/src/test/results/clientpositive/join8.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -101,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join9.q.out b/ql/src/test/results/clientpositive/join9.q.out index befc47a..ea9380f 100644 --- a/ql/src/test/results/clientpositive/join9.q.out +++ b/ql/src/test/results/clientpositive/join9.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -210,6 +212,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -243,6 +272,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join_map_ppr.q.out b/ql/src/test/results/clientpositive/join_map_ppr.q.out index dba176d..46986ee 100644 --- a/ql/src/test/results/clientpositive/join_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/join_map_ppr.q.out @@ -27,6 +27,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -131,6 +132,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -186,6 +203,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -228,6 +274,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -605,6 +659,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -688,7 +743,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -709,6 +764,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -764,6 +835,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -783,7 +883,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -806,6 +906,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -820,7 +928,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -850,7 +958,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -871,7 +979,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -907,7 +1015,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -937,7 +1045,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -958,7 +1066,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 diff --git a/ql/src/test/results/clientpositive/lb_fs_stats.q.out b/ql/src/test/results/clientpositive/lb_fs_stats.q.out index b07192b..9bc96e4 100644 --- a/ql/src/test/results/clientpositive/lb_fs_stats.q.out +++ b/ql/src/test/results/clientpositive/lb_fs_stats.q.out @@ -46,7 +46,7 @@ Database: default Table: test_tab #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out index 35e6827..7e25f6f 100644 --- a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out +++ b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out @@ -237,9 +237,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -279,6 +283,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -308,14 +327,49 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Select Operator @@ -337,6 +391,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -348,6 +417,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/lineage1.q.out b/ql/src/test/results/clientpositive/lineage1.q.out index cc905d6..011828d 100644 --- a/ql/src/test/results/clientpositive/lineage1.q.out +++ b/ql/src/test/results/clientpositive/lineage1.q.out @@ -39,6 +39,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -111,6 +112,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE @@ -126,6 +140,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -149,6 +189,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_l1 + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out index f70f8b2..ff0ad8b 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +187,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -200,6 +254,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -240,7 +302,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -285,7 +347,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -357,7 +419,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out index 6920dcc..e3b6f36 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +140,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src PREHOOK: type: QUERY @@ -195,7 +257,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -238,7 +300,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out index 55acbb7..990ba9f 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 16), compute_stats(col2, 16), compute_stats(col3, 16), compute_stats(col4, 16), compute_stats(col5, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +140,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') select 1, key, 1, value, 1 from src PREHOOK: type: QUERY @@ -201,7 +263,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -246,7 +308,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 @@ -338,7 +400,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out index 3a1d2a4..8455467 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 16), compute_stats(col2, 16), compute_stats(col3, 16), compute_stats(col4, 16), compute_stats(col5, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +140,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') select 1, key, 1, value, 1 from src PREHOOK: type: QUERY @@ -201,7 +263,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -246,7 +308,7 @@ STAGE PLANS: ds 2008-04-08 hr 2013-01-23+18:00:99 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out index ab30b7d..13df7c9 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -116,6 +133,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -149,6 +195,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing + Is Table Level Stats: true + PREHOOK: query: insert overwrite table list_bucketing select * from src PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -177,7 +231,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -283,7 +337,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -304,7 +358,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out index 01bc19c..7b72c66 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -250,7 +312,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -311,7 +373,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out index 52646a2..de4e086 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +181,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -194,6 +248,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds='2008-04-08', hr='11') select key, value from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -231,7 +293,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 10624 @@ -303,7 +365,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index 84ada2b..9e8560a 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -250,7 +312,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -286,6 +348,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -331,6 +394,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -433,6 +515,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -474,6 +590,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -624,7 +748,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -685,7 +809,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out index 71d7e16..3e55d2a 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +187,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -200,6 +254,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -240,7 +302,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -285,7 +347,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -362,7 +424,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -408,7 +470,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index 8e20ff1..5217a3f 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -254,7 +316,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +359,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -333,6 +395,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -379,6 +442,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,6 +563,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -523,6 +639,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -681,7 +805,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -724,7 +848,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -785,7 +909,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -831,7 +955,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 58bf240..286a045 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -254,7 +316,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +359,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -333,6 +395,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -379,6 +442,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,6 +563,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -523,6 +639,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -681,7 +805,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -724,7 +848,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 984 rawDataSize 9488 @@ -785,7 +909,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -831,7 +955,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index b38d332..071f0de 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -254,7 +316,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +359,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -414,7 +476,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index 624e3ac..221b3c9 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -250,7 +312,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -286,6 +348,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -331,6 +394,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -433,6 +515,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -474,6 +590,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -624,7 +748,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -685,7 +809,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out index 17c7afe..98b9b13 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out @@ -47,7 +47,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -97,7 +97,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -181,7 +181,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -349,7 +349,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 5a326f6..4cafefa 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -47,7 +47,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -97,7 +97,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -181,7 +181,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -268,7 +268,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out index 3ff221d..9931afd 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out @@ -43,7 +43,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -103,7 +103,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -168,7 +168,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -211,7 +211,7 @@ STAGE PLANS: ds 1 hr 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -322,7 +322,7 @@ STAGE PLANS: ds 1 hr 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -408,7 +408,7 @@ STAGE PLANS: ds 1 hr 3 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/llap_acid.q.out b/ql/src/test/results/clientpositive/llap_acid.q.out index 5970fd7..e04af0e 100644 --- a/ql/src/test/results/clientpositive/llap_acid.q.out +++ b/ql/src/test/results/clientpositive/llap_acid.q.out @@ -91,18 +91,18 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: int) sort order: ++ - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized LLAP IO: may be used (ACID table) @@ -110,10 +110,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -141,24 +141,26 @@ POSTHOOK: Input: default@orc_llap@csmallint=1 POSTHOOK: Input: default@orc_llap@csmallint=2 POSTHOOK: Input: default@orc_llap@csmallint=3 #### A masked pattern was here #### --285355633 1 -1241163445 --109813638 1 -58941842 -164554497 1 1161977292 -199879534 1 123351087 -246423894 1 -1645852809 -354670578 1 562841852 -455419170 1 1108177470 -665801232 1 480783141 -708885482 1 -1645852809 --285355633 2 -1241163445 --109813638 2 -58941842 -164554497 2 1161977292 -199879534 2 123351087 -246423894 2 -1645852809 -354670578 2 562841852 -455419170 2 1108177470 -665801232 2 480783141 -708885482 2 -1645852809 +-970918963 1 -588508542 +-734267047 1 895807844 +-548534304 1 -1900081338 +-546972460 1 665899329 +-469581869 1 1033373031 +-101217409 1 183045850 +-37908611 1 -1378658304 +274816197 1 -437339127 +371876492 1 1862746855 +927956889 1 -935575737 +-970918963 2 -588508542 +-734267047 2 895807844 +-548534304 2 -1900081338 +-546972460 2 665899329 +-469581869 2 1033373031 +-101217409 2 183045850 +-37908611 2 -1378658304 +274816197 2 -437339127 +371876492 2 1862746855 +927956889 2 -935575737 -923308739 3 -1887561756 -3728 3 -1887561756 762 3 -1645852809 @@ -216,18 +218,18 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: int) sort order: ++ - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized LLAP IO: may be used (ACID table) @@ -235,10 +237,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -266,25 +268,27 @@ POSTHOOK: Input: default@orc_llap@csmallint=1 POSTHOOK: Input: default@orc_llap@csmallint=2 POSTHOOK: Input: default@orc_llap@csmallint=3 #### A masked pattern was here #### --285355633 1 -1241163445 --109813638 1 -58941842 +-970918963 1 -588508542 +-734267047 1 895807844 +-548534304 1 -1900081338 +-546972460 1 665899329 +-469581869 1 1033373031 +-101217409 1 183045850 +-37908611 1 -1378658304 1 1 2 -164554497 1 1161977292 -199879534 1 123351087 -246423894 1 -1645852809 -354670578 1 562841852 -455419170 1 1108177470 -665801232 1 480783141 -708885482 1 -1645852809 --285355633 2 -1241163445 --109813638 2 -58941842 -164554497 2 1161977292 -199879534 2 123351087 -246423894 2 -1645852809 -354670578 2 562841852 -455419170 2 1108177470 -665801232 2 480783141 -708885482 2 -1645852809 +274816197 1 -437339127 +371876492 1 1862746855 +927956889 1 -935575737 +-970918963 2 -588508542 +-734267047 2 895807844 +-548534304 2 -1900081338 +-546972460 2 665899329 +-469581869 2 1033373031 +-101217409 2 183045850 +-37908611 2 -1378658304 +274816197 2 -437339127 +371876492 2 1862746855 +927956889 2 -935575737 -923308739 3 -1887561756 -3728 3 -1887561756 762 3 -1645852809 diff --git a/ql/src/test/results/clientpositive/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/load_dyn_part1.q.out index 84d806d..ed9f24c 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part1.q.out @@ -58,16 +58,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -91,6 +89,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -106,6 +120,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -132,6 +180,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -162,15 +224,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -191,31 +244,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git a/ql/src/test/results/clientpositive/load_dyn_part10.q.out b/ql/src/test/results/clientpositive/load_dyn_part10.q.out index 99d3572..bbfa0bb 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part10.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part10.q.out @@ -46,6 +46,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part10 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -83,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part10 + PREHOOK: query: from srcpart insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/load_dyn_part13.q.out index 9e0ac6f..742dcf6 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part13.q.out @@ -60,6 +60,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -85,6 +86,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -105,6 +122,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -122,6 +173,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part13 + PREHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( select key, value, '22' diff --git a/ql/src/test/results/clientpositive/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/load_dyn_part14.q.out index a6a5c63..bfc0c0e 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -50,6 +50,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -71,7 +72,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -101,6 +101,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -112,6 +128,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -123,6 +155,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -148,6 +214,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part14 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -192,7 +265,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -222,7 +294,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 diff --git a/ql/src/test/results/clientpositive/load_dyn_part2.q.out b/ql/src/test/results/clientpositive/load_dyn_part2.q.out index 93778a2..e80a262 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part2.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part2.q.out @@ -38,6 +38,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -68,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -85,6 +103,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/load_dyn_part3.q.out b/ql/src/test/results/clientpositive/load_dyn_part3.q.out index 3849100..4542fe0 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part3.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part3.q.out @@ -44,6 +44,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 + PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/load_dyn_part4.q.out b/ql/src/test/results/clientpositive/load_dyn_part4.q.out index 40b0bbb..6c36346 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part4.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part4.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part4 + PREHOOK: query: insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/load_dyn_part8.q.out b/ql/src/test/results/clientpositive/load_dyn_part8.q.out index cb1a757..c6f6127 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part8.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part8.q.out @@ -48,8 +48,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -96,6 +99,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -134,6 +156,34 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -334,6 +384,40 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -367,6 +451,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + Stage: Stage-1 Move Operator tables: @@ -399,6 +499,82 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: from srcpart insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part8 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git a/ql/src/test/results/clientpositive/load_dyn_part9.q.out b/ql/src/test/results/clientpositive/load_dyn_part9.q.out index 414e784..8955bf5 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part9.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part9.q.out @@ -46,6 +46,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -83,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part9 + PREHOOK: query: from srcpart insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/mapjoin_hook.q.out b/ql/src/test/results/clientpositive/mapjoin_hook.q.out index a9f9be3..bd59091 100644 --- a/ql/src/test/results/clientpositive/mapjoin_hook.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_hook.q.out @@ -13,20 +13,24 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-6:MAPREDLOCAL +RUN: Stage-7:MAPREDLOCAL RUN: Stage-2:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS +RUN: Stage-8:COLUMNSTATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-6:MAPREDLOCAL -RUN: Stage-5:MAPRED +RUN: Stage-7:MAPREDLOCAL +RUN: Stage-6:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-8:COLUMNSTATS PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') @@ -38,14 +42,14 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 1 BACKUP_COMMON_JOIN: 1 -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-1:MAPRED +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 1 CONVERTED_MAPJOIN_LOCAL: 1 BACKUP_COMMON_JOIN: 0 +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-5:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-10:COLUMNSTATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY @@ -53,14 +57,14 @@ PREHOOK: Input: default@src PREHOOK: Output: default@dest1 FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 2 BACKUP_COMMON_JOIN: 2 -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 1 CONVERTED_MAPJOIN_LOCAL: 2 BACKUP_COMMON_JOIN: 1 +RUN: Stage-12:CONDITIONAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-8:CONDITIONAL -RUN: Stage-12:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-9:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-7:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS +RUN: Stage-17:COLUMNSTATS diff --git a/ql/src/test/results/clientpositive/mapreduce1.q.out b/ql/src/test/results/clientpositive/mapreduce1.q.out index 3d0a156..2660486 100644 --- a/ql/src/test/results/clientpositive/mapreduce1.q.out +++ b/ql/src/test/results/clientpositive/mapreduce1.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce2.q.out b/ql/src/test/results/clientpositive/mapreduce2.q.out index 676c387..25469c4 100644 --- a/ql/src/test/results/clientpositive/mapreduce2.q.out +++ b/ql/src/test/results/clientpositive/mapreduce2.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce3.q.out b/ql/src/test/results/clientpositive/mapreduce3.q.out index fc1a402..0d80be1 100644 --- a/ql/src/test/results/clientpositive/mapreduce3.q.out +++ b/ql/src/test/results/clientpositive/mapreduce3.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce4.q.out b/ql/src/test/results/clientpositive/mapreduce4.q.out index 17fa029..8d5182a 100644 --- a/ql/src/test/results/clientpositive/mapreduce4.q.out +++ b/ql/src/test/results/clientpositive/mapreduce4.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce5.q.out b/ql/src/test/results/clientpositive/mapreduce5.q.out index 21103f8..0bdae3d 100644 --- a/ql/src/test/results/clientpositive/mapreduce5.q.out +++ b/ql/src/test/results/clientpositive/mapreduce5.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +57,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +86,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key as c1, CAST(src.key / 10 AS INT) as c2, CAST(src.key % 10 AS INT) as c3, src.value as c4 diff --git a/ql/src/test/results/clientpositive/mapreduce6.q.out b/ql/src/test/results/clientpositive/mapreduce6.q.out index fe4e631..df1a9c6 100644 --- a/ql/src/test/results/clientpositive/mapreduce6.q.out +++ b/ql/src/test/results/clientpositive/mapreduce6.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +57,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +86,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, CAST(src.key / 10 AS INT) as c2, CAST(src.key % 10 AS INT) as c3, src.value diff --git a/ql/src/test/results/clientpositive/mapreduce7.q.out b/ql/src/test/results/clientpositive/mapreduce7.q.out index cc97887..4de831a 100644 --- a/ql/src/test/results/clientpositive/mapreduce7.q.out +++ b/ql/src/test/results/clientpositive/mapreduce7.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 16), compute_stats(v, 16), compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.*, src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce8.q.out b/ql/src/test/results/clientpositive/mapreduce8.q.out index b1763c7..7a57081 100644 --- a/ql/src/test/results/clientpositive/mapreduce8.q.out +++ b/ql/src/test/results/clientpositive/mapreduce8.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 16), compute_stats(v, 16), compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.*, src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/merge1.q.out b/ql/src/test/results/clientpositive/merge1.q.out index 2487bf7..8e38bd8 100644 --- a/ql/src/test/results/clientpositive/merge1.q.out +++ b/ql/src/test/results/clientpositive/merge1.q.out @@ -20,9 +20,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -89,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -119,6 +143,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table dest1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -504,6 +550,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -527,6 +574,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -550,6 +623,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -605,6 +685,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -628,6 +709,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -651,6 +758,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge2.q.out b/ql/src/test/results/clientpositive/merge2.q.out index a8b4bd5..90fe0f8 100644 --- a/ql/src/test/results/clientpositive/merge2.q.out +++ b/ql/src/test/results/clientpositive/merge2.q.out @@ -20,9 +20,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -89,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -119,6 +143,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table test1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -504,6 +550,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -527,6 +574,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -550,6 +623,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -605,6 +685,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -628,6 +709,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -651,6 +758,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge3.q.out b/ql/src/test/results/clientpositive/merge3.q.out index 1eb5f0e..86b976f 100644 --- a/ql/src/test/results/clientpositive/merge3.q.out +++ b/ql/src/test/results/clientpositive/merge3.q.out @@ -2376,6 +2376,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2421,6 +2422,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2432,7 +2452,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2479,7 +2499,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2521,6 +2541,40 @@ STAGE PLANS: Truncated Path -> Alias: /merge_src_part/ds=2008-04-08 [merge_src_part] /merge_src_part/ds=2008-04-09 [merge_src_part] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2562,6 +2616,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4797,6 +4859,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4814,8 +4877,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: + key expressions: _col2 (type: string) + null sort order: a + sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 @@ -4832,7 +4896,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4879,7 +4943,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4956,6 +5020,42 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4997,6 +5097,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge4.q.out b/ql/src/test/results/clientpositive/merge4.q.out index 182c6a8..c73150d 100644 --- a/ql/src/test/results/clientpositive/merge4.q.out +++ b/ql/src/test/results/clientpositive/merge4.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -41,6 +42,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +102,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1138,6 +1180,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1161,6 +1204,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1187,6 +1264,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2763,9 +2847,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2786,7 +2872,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) TableScan alias: src @@ -2803,7 +2888,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Select Operator @@ -2821,6 +2905,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1 Data size: 353 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1167 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -2847,6 +2947,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2877,6 +2984,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1167 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1143 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1143 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1143 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select * from ( select key, value, hr from srcpart where ds='2008-04-08' diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out index a777fe0..a3ec1e1 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out @@ -57,6 +57,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +129,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + PREHOOK: query: insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_merge_dp @@ -645,6 +687,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -668,6 +711,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -694,6 +771,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1275,6 +1359,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1298,6 +1383,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1324,6 +1443,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out index 5a2afb0..e68cd55 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out @@ -76,6 +76,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -99,6 +100,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -125,6 +160,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out index 055e07a..5df6d4a 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out @@ -136,6 +136,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -159,6 +160,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -185,6 +220,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index cbeaf42..4d70e64 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -137,6 +137,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -160,6 +161,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -186,6 +221,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 5a562f4..347bd04 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -113,6 +113,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +137,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -162,6 +197,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out b/ql/src/test/results/clientpositive/mergejoin.q.out index 025de3c..b17ec84 100644 --- a/ql/src/test/results/clientpositive/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/mergejoin.q.out @@ -1659,36 +1659,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1701,34 +1701,34 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 56870 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2195,36 +2195,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2237,34 +2237,34 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 56870 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out index 82b4a41..ba5418e 100644 --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -183,46 +183,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -233,46 +199,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -283,50 +215,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -337,50 +231,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/multi_insert_gby.q.out index cb97e58..09fb2ee 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby.q.out @@ -30,8 +30,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -72,6 +76,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -93,6 +112,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -107,6 +141,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -117,9 +187,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 SELECT key, COUNT(*) WHERE key>450 GROUP BY key @@ -211,8 +303,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -251,6 +347,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -272,6 +383,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -286,6 +412,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -296,9 +458,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 SELECT key, COUNT(*) WHERE key>450 GROUP BY key diff --git a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out index 476dfa7..d44080c 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out @@ -33,6 +33,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -92,6 +94,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: count + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(count, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete @@ -105,6 +127,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: double) + outputColumnNames: percentile + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(percentile, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -119,6 +161,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: count + Column Types: int + Table: default.e1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: percentile + Column Types: double + Table: default.e2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out index 32aec10..424bc66 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -41,8 +41,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -103,6 +107,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -121,6 +140,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -135,6 +169,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -145,9 +215,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e2 @@ -167,8 +259,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -229,6 +325,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -247,6 +358,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -261,6 +387,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -271,9 +433,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e1 SELECT key, COUNT(distinct value) group by key @@ -1606,9 +1790,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -1667,6 +1855,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1681,11 +1884,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) @@ -1709,6 +1948,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1720,9 +1974,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e1 @@ -1746,10 +2022,16 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-13 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-6 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -1797,6 +2079,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -1829,6 +2126,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -1847,6 +2159,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1861,6 +2188,49 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -1871,9 +2241,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -1884,6 +2276,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/multi_insert_gby4.q.out b/ql/src/test/results/clientpositive/multi_insert_gby4.q.out index dd01b74..ed17fac 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby4.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby4.q.out @@ -44,10 +44,16 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-12 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 Stage-2 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -92,6 +98,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -113,6 +134,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 490) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +170,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -148,6 +199,49 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e3 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -158,9 +252,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -171,9 +287,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-6 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (SELECT key, value FROM src) a INSERT OVERWRITE TABLE e1 SELECT key, COUNT(*) WHERE key>450 GROUP BY key diff --git a/ql/src/test/results/clientpositive/multi_insert_mixed.q.out b/ql/src/test/results/clientpositive/multi_insert_mixed.q.out index 9acae2e..e9f9093 100644 --- a/ql/src/test/results/clientpositive/multi_insert_mixed.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_mixed.q.out @@ -39,12 +39,16 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-5, Stage-8, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-5, Stage-8, Stage-9, Stage-10 + Stage-13 depends on stages: Stage-5, Stage-8, Stage-9, Stage-10 Stage-6 depends on stages: Stage-3 Stage-7 depends on stages: Stage-6 Stage-1 depends on stages: Stage-7 Stage-8 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -100,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -136,6 +155,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -150,6 +189,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi3 + Stage: Stage-6 Map Reduce Map Operator Tree: @@ -196,6 +256,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -223,6 +303,28 @@ STAGE PLANS: Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select key, count(1) group by key order by key insert overwrite table src_multi2 select value, count(1) group by value order by value diff --git a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out index af0ef54..20c59c2 100644 --- a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out @@ -29,8 +29,11 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-5, Stage-6 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -54,6 +57,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -69,6 +85,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -86,6 +130,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -99,6 +157,28 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -166,19 +246,17 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3, Stage-10, Stage-11 + Stage-13 depends on stages: Stage-3, Stage-10, Stage-11 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -202,6 +280,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -217,6 +308,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -243,6 +362,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -286,44 +419,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -390,11 +506,19 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3, Stage-5, Stage-11 + Stage-13 depends on stages: Stage-3, Stage-5, Stage-11 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -418,6 +542,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -433,6 +570,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -450,6 +615,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -463,6 +642,67 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -533,6 +773,8 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-17 depends on stages: Stage-3, Stage-10, Stage-16 + Stage-18 depends on stages: Stage-3, Stage-10, Stage-16 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 @@ -543,6 +785,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -566,6 +809,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -581,6 +837,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -607,6 +891,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -689,6 +987,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -757,8 +1077,12 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -794,6 +1118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -810,6 +1149,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Dependency Collection @@ -827,6 +1181,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -837,9 +1205,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -900,19 +1312,23 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-18 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 + Stage-19 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -948,6 +1364,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -964,6 +1395,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -990,6 +1436,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1000,7 +1460,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 + Stage: Stage-11 Stats-Aggr Operator Stage: Stage-5 @@ -1033,16 +1493,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1054,7 +1536,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1066,12 +1548,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1133,8 +1637,12 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1170,6 +1678,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1186,6 +1709,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Dependency Collection @@ -1203,6 +1741,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1213,9 +1765,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1276,19 +1872,23 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-18 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 + Stage-19 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1324,6 +1924,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1340,6 +1955,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -1366,6 +1996,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1376,7 +2020,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 + Stage: Stage-11 Stats-Aggr Operator Stage: Stage-5 @@ -1409,16 +2053,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1430,7 +2096,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1442,12 +2108,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1509,8 +2197,11 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-5, Stage-6 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1536,6 +2227,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1547,6 +2251,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1567,6 +2286,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1578,6 +2310,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -1595,6 +2355,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1608,6 +2382,28 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -1694,19 +2490,17 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-13 depends on stages: Stage-3, Stage-10, Stage-11 + Stage-14 depends on stages: Stage-3, Stage-10, Stage-11 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1732,6 +2526,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1743,6 +2550,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1763,6 +2585,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1774,6 +2609,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -1800,6 +2663,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1843,44 +2720,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1966,11 +2826,19 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-13 depends on stages: Stage-3, Stage-5, Stage-11 + Stage-14 depends on stages: Stage-3, Stage-5, Stage-11 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1996,6 +2864,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2007,6 +2888,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2027,6 +2923,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2038,6 +2947,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -2055,6 +2992,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -2068,39 +3019,100 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator -PREHOOK: query: from (select * from src union all select * from src) s -insert overwrite table src_multi1 select * where key < 10 -insert overwrite table src_multi2 select * where key > 10 and key < 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_multi1 -PREHOOK: Output: default@src_multi2 -POSTHOOK: query: from (select * from src union all select * from src) s -insert overwrite table src_multi1 select * where key < 10 -insert overwrite table src_multi2 select * where key > 10 and key < 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_multi1 -POSTHOOK: Output: default@src_multi2 -POSTHOOK: Lineage: src_multi1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_multi1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from src_multi1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_multi1 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_multi1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_multi1 + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -2 val_2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: from (select * from src union all select * from src) s +insert overwrite table src_multi1 select * where key < 10 +insert overwrite table src_multi2 select * where key > 10 and key < 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_multi1 +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: from (select * from src union all select * from src) s +insert overwrite table src_multi1 select * where key < 10 +insert overwrite table src_multi2 select * where key > 10 and key < 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_multi1 +POSTHOOK: Output: default@src_multi2 +POSTHOOK: Lineage: src_multi1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from src_multi1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_multi1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 2 val_2 4 val_4 4 val_4 @@ -2157,6 +3169,8 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-18 depends on stages: Stage-3, Stage-10, Stage-16 + Stage-19 depends on stages: Stage-3, Stage-10, Stage-16 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 @@ -2167,6 +3181,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -2192,6 +3207,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2203,6 +3231,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2223,6 +3266,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2234,6 +3290,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -2260,6 +3344,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -2342,6 +3440,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -3273,14 +4393,18 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-10 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-5, Stage-7, Stage-8, Stage-2, Stage-3 + Stage-12 depends on stages: Stage-5, Stage-7, Stage-8, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-9 + Stage-10 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-4 @@ -3304,6 +4428,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3319,46 +4456,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3376,6 +4510,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3389,13 +4537,77 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -3497,14 +4709,23 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-8, Stage-11, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-16 depends on stages: Stage-5, Stage-7, Stage-13, Stage-2, Stage-3 + Stage-17 depends on stages: Stage-5, Stage-7, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-12 depends on stages: Stage-4 , consists of Stage-9, Stage-8, Stage-10 + Stage-9 + Stage-8 + Stage-10 + Stage-11 depends on stages: Stage-10 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3528,6 +4749,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3543,46 +4777,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3600,6 +4831,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3613,22 +4858,125 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-2 + Stage: Stage-12 + Conditional Operator + + Stage: Stage-9 Move Operator files: - hdfs directory: false + hdfs directory: true #### A masked pattern was here #### Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-11 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3723,22 +5071,21 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-4, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-16 depends on stages: Stage-5, Stage-12, Stage-13, Stage-2, Stage-3 + Stage-17 depends on stages: Stage-5, Stage-12, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-12 depends on stages: Stage-1 Stage-7 Stage-9 Stage-10 depends on stages: Stage-9 - Stage-17 depends on stages: Stage-4 , consists of Stage-14, Stage-13, Stage-15 - Stage-14 - Stage-13 - Stage-15 - Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 - Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3762,6 +5109,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3777,46 +5137,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -3843,6 +5200,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3886,44 +5257,69 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-17 - Conditional Operator - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-15 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-16 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -3931,7 +5327,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-15 Map Reduce Map Operator Tree: TableScan @@ -4035,9 +5431,11 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-19, Stage-20 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-21 depends on stages: Stage-5, Stage-12, Stage-18, Stage-2, Stage-3 + Stage-22 depends on stages: Stage-5, Stage-12, Stage-18, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-12 depends on stages: Stage-1 Stage-7 @@ -4048,9 +5446,11 @@ STAGE DEPENDENCIES: Stage-13 Stage-15 Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-19 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-19 + Stage-20 depends on stages: Stage-19 + Stage-3 depends on stages: Stage-20 STAGE PLANS: Stage: Stage-4 @@ -4074,6 +5474,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -4089,46 +5502,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -4155,6 +5565,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-21 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-22 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -4237,13 +5661,77 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-18 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-19 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-20 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/multi_insert_union_src.q.out b/ql/src/test/results/clientpositive/multi_insert_union_src.q.out index 1ff1db5..28dd199 100644 --- a/ql/src/test/results/clientpositive/multi_insert_union_src.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_union_src.q.out @@ -52,6 +52,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-5 Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 @@ -131,6 +133,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -145,6 +167,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -167,6 +203,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator diff --git a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out index 5f69cc5..316c3b3 100644 --- a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -67,6 +67,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -118,6 +120,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -132,6 +149,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id @@ -149,6 +195,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -200,6 +248,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -214,6 +277,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -236,8 +328,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -285,6 +381,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -300,6 +411,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -314,6 +440,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -324,9 +486,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -349,8 +533,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -398,6 +586,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -413,6 +616,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -427,6 +645,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -437,9 +691,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -462,8 +738,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -517,6 +797,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -532,6 +827,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -546,6 +856,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -556,9 +902,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -581,8 +949,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -636,6 +1008,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -651,6 +1038,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -665,6 +1067,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -675,9 +1113,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -700,8 +1160,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -767,6 +1231,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -782,6 +1261,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -796,6 +1290,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -806,9 +1336,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -831,8 +1383,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -898,6 +1454,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -913,6 +1484,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -927,6 +1513,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -937,6 +1559,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out b/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out index 7af8c43..16e3814 100644 --- a/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out +++ b/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out @@ -52,9 +52,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -114,6 +118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -128,11 +147,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -157,6 +212,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -168,9 +238,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1 @@ -185,9 +277,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -247,6 +343,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -261,11 +372,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -290,6 +437,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -301,9 +463,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 @@ -318,9 +502,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -380,6 +568,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -394,11 +597,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -423,6 +662,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -434,9 +688,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 @@ -451,8 +727,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -492,6 +772,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -510,6 +805,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -524,6 +834,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest4 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -534,9 +880,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 @@ -553,12 +921,18 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8, Stage-10, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8, Stage-10, Stage-11 + Stage-14 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8, Stage-10, Stage-11 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 - Stage-8 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-6 + Stage-9 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-9 + Stage-10 depends on stages: Stage-2 + Stage-11 depends on stages: Stage-9 STAGE PLANS: Stage: Stage-3 @@ -634,6 +1008,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -648,11 +1037,54 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -677,6 +1109,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -688,10 +1135,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-7 + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -720,6 +1189,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -731,6 +1215,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 + Stage: Stage-10 Stats-Aggr Operator + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out b/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out index d01461b..4265eb9 100644 --- a/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out +++ b/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -36,7 +38,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -58,6 +59,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(as, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -72,6 +88,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -103,6 +148,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -121,7 +168,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -143,6 +189,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(as, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,6 +218,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -197,6 +287,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -215,7 +307,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -237,6 +328,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(as, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -251,6 +357,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE `insert` SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/notable_alias1.q.out b/ql/src/test/results/clientpositive/notable_alias1.q.out index 200b31c..3e44959 100644 --- a/ql/src/test/results/clientpositive/notable_alias1.q.out +++ b/ql/src/test/results/clientpositive/notable_alias1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 16), compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', key, count(1) WHERE src.key < 100 group by key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/notable_alias2.q.out b/ql/src/test/results/clientpositive/notable_alias2.q.out index 4df8073..49a2b95 100644 --- a/ql/src/test/results/clientpositive/notable_alias2.q.out +++ b/ql/src/test/results/clientpositive/notable_alias2.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 16), compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, count(1) WHERE key < 100 group by src.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/orc_merge1.q.out b/ql/src/test/results/clientpositive/orc_merge1.q.out index a83e85b..939bf2a 100644 --- a/ql/src/test/results/clientpositive/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/orc_merge1.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -126,6 +168,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +192,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -175,6 +252,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -240,6 +324,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -263,6 +348,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -289,6 +408,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge10.q.out b/ql/src/test/results/clientpositive/orc_merge10.q.out index 607aaeb..468b03d 100644 --- a/ql/src/test/results/clientpositive/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/orc_merge10.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -126,6 +168,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +192,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -175,6 +252,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -240,6 +324,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -263,6 +348,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -289,6 +408,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge2.q.out b/ql/src/test/results/clientpositive/orc_merge2.q.out index d4c474f..29b3373 100644 --- a/ql/src/test/results/clientpositive/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge2.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -53,6 +54,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -80,6 +115,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge3.q.out b/ql/src/test/results/clientpositive/orc_merge3.q.out index 7bf12c6..416cb3c 100644 --- a/ql/src/test/results/clientpositive/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/orc_merge3.q.out @@ -60,6 +60,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -83,6 +84,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -106,6 +133,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge4.q.out b/ql/src/test/results/clientpositive/orc_merge4.q.out index 828f204..7c85443 100644 --- a/ql/src/test/results/clientpositive/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/orc_merge4.q.out @@ -78,6 +78,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -101,6 +102,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -124,6 +151,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge5.q.out b/ql/src/test/results/clientpositive/orc_merge5.q.out index e845186..0d965df 100644 --- a/ql/src/test/results/clientpositive/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/orc_merge5.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -68,6 +95,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -112,6 +146,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -139,6 +174,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -162,6 +223,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge6.q.out b/ql/src/test/results/clientpositive/orc_merge6.q.out index 5ece361..5de3c47 100644 --- a/ql/src/test/results/clientpositive/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/orc_merge6.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -71,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -157,6 +199,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -184,6 +227,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -210,6 +287,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out index a83e85b..939bf2a 100644 --- a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -126,6 +168,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +192,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -175,6 +252,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -240,6 +324,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -263,6 +348,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -289,6 +408,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out index fcf1c68..762acd0 100644 --- a/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out index a27041f..6388dfc 100644 --- a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -50,6 +51,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -66,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/parallel.q.out b/ql/src/test/results/clientpositive/parallel.q.out index 459105e..c851131 100644 --- a/ql/src/test/results/clientpositive/parallel.q.out +++ b/ql/src/test/results/clientpositive/parallel.q.out @@ -29,8 +29,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -91,6 +95,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -104,6 +123,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +152,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -128,9 +198,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select key, value from src group by key, value) s insert overwrite table src_a select s.key, s.value group by s.key, s.value insert overwrite table src_b select s.key, s.value group by s.key, s.value diff --git a/ql/src/test/results/clientpositive/parallel_join1.q.out b/ql/src/test/results/clientpositive/parallel_join1.q.out index 8843661..43480e1 100644 --- a/ql/src/test/results/clientpositive/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/parallel_join1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/parquet_analyze.q.out b/ql/src/test/results/clientpositive/parquet_analyze.q.out index d3cdc3f..8627b18 100644 --- a/ql/src/test/results/clientpositive/parquet_analyze.q.out +++ b/ql/src/test/results/clientpositive/parquet_analyze.q.out @@ -90,7 +90,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 700 @@ -138,7 +138,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 5952 diff --git a/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out index b9a3664..6fd3357 100644 --- a/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out +++ b/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out @@ -56,7 +56,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone UTC @@ -125,7 +125,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone PST @@ -194,7 +194,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 @@ -262,7 +262,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone CST @@ -331,7 +331,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone PST diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index a1301fd..14fb140 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -138,7 +138,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -280,7 +280,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -327,7 +327,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -374,7 +374,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -554,7 +554,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -601,7 +601,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -745,7 +745,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -792,7 +792,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -938,7 +938,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -985,7 +985,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1032,7 +1032,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1189,7 +1189,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1236,7 +1236,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1283,7 +1283,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1447,7 +1447,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1494,7 +1494,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1621,7 +1621,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1668,7 +1668,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1835,7 +1835,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1882,7 +1882,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1929,7 +1929,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2123,7 +2123,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2170,7 +2170,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2326,7 +2326,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2585,7 +2585,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2632,7 +2632,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2880,7 +2880,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2927,7 +2927,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2974,7 +2974,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3021,7 +3021,7 @@ STAGE PLANS: partition values: ds 2000-04-11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3200,7 +3200,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3247,7 +3247,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3294,7 +3294,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3459,16 +3459,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -3514,6 +3512,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3550,6 +3564,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3561,7 +3602,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3602,6 +3643,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3644,6 +3714,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -3824,15 +3910,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -3870,235 +3947,122 @@ STAGE PLANS: Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: #### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false - Stage: Stage-12 +PREHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Output: default@pcr_t2 +PREHOOK: Output: default@pcr_t3 +POSTHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcr_t2 +POSTHOOK: Output: default@pcr_t3 +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain extended +from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: from pcr_t1 -insert overwrite table pcr_t2 select key, value where ds='2000-04-08' -insert overwrite table pcr_t3 select key, value where ds='2000-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@pcr_t1 -PREHOOK: Input: default@pcr_t1@ds=2000-04-08 -PREHOOK: Output: default@pcr_t2 -PREHOOK: Output: default@pcr_t3 -POSTHOOK: query: from pcr_t1 -insert overwrite table pcr_t2 select key, value where ds='2000-04-08' -insert overwrite table pcr_t3 select key, value where ds='2000-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@pcr_t1 -POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 -POSTHOOK: Output: default@pcr_t2 -POSTHOOK: Output: default@pcr_t3 -POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: explain extended -from pcr_t1 -insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 -insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -from pcr_t1 -insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 -insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: pcr_t1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + alias: pcr_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false @@ -4119,7 +4083,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4140,6 +4104,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 2 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (key = 3) (type: boolean) @@ -4159,7 +4139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4180,6 +4160,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 3 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4191,7 +4198,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4232,6 +4239,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -4251,7 +4287,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4274,6 +4310,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -4288,7 +4340,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4318,7 +4370,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4339,7 +4391,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4375,7 +4427,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4405,7 +4457,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4426,7 +4478,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4454,15 +4506,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -4472,7 +4515,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4500,180 +4543,69 @@ STAGE PLANS: Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index 3438c74..114e77f 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -136,7 +136,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -183,7 +183,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -334,7 +334,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -553,7 +553,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -600,7 +600,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -813,7 +813,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -860,7 +860,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -905,7 +905,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -926,7 +926,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1128,7 +1128,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1175,7 +1175,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1222,7 +1222,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1267,7 +1267,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1288,7 +1288,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out index 2c3e39f..3ad7d38 100644 --- a/ql/src/test/results/clientpositive/pointlookup3.q.out +++ b/ql/src/test/results/clientpositive/pointlookup3.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -418,7 +418,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -638,7 +638,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -686,7 +686,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -900,7 +900,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -948,7 +948,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -996,7 +996,7 @@ STAGE PLANS: ds1 2000-04-10 ds2 2001-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/pointlookup4.q.out b/ql/src/test/results/clientpositive/pointlookup4.q.out index 8ef5551..3c9cc60 100644 --- a/ql/src/test/results/clientpositive/pointlookup4.q.out +++ b/ql/src/test/results/clientpositive/pointlookup4.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -315,7 +315,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/ppd_constant_expr.q.out b/ql/src/test/results/clientpositive/ppd_constant_expr.q.out index cbe7654..a05243b 100644 --- a/ql/src/test/results/clientpositive/ppd_constant_expr.q.out +++ b/ql/src/test/results/clientpositive/ppd_constant_expr.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: double), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -156,6 +190,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -179,6 +214,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: double), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -202,6 +263,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/ppd_join5.q.out b/ql/src/test/results/clientpositive/ppd_join5.q.out index 551769c..5f143e1 100644 --- a/ql/src/test/results/clientpositive/ppd_join5.q.out +++ b/ql/src/test/results/clientpositive/ppd_join5.q.out @@ -32,7 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE [] POSTHOOK: Lineage: t1.id2 SIMPLE [] POSTHOOK: Lineage: t2.d SIMPLE [] POSTHOOK: Lineage: t2.id SIMPLE [] -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) @@ -63,35 +63,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -104,34 +101,38 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -148,7 +149,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from ( select a.*,b.d d1,c.d d2 from @@ -183,35 +184,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -224,46 +222,54 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col3 > 1) or (_col4 > 1)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + Filter Operator + predicate: ((_col3 > 1) or (_col4 > 1)) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -271,7 +277,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from ( select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) diff --git a/ql/src/test/results/clientpositive/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/ppd_multi_insert.q.out index 7e501c7..baf1f45 100644 --- a/ql/src/test/results/clientpositive/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/ppd_multi_insert.q.out @@ -48,10 +48,16 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-13 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-14 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -105,6 +111,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -120,6 +141,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +171,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -163,6 +215,49 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -173,9 +268,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -189,9 +306,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 Move Operator files: @@ -1313,10 +1459,16 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-13 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-14 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -1370,6 +1522,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1385,6 +1552,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1400,6 +1582,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -1428,6 +1626,49 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -1438,9 +1679,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -1454,9 +1717,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 Move Operator files: diff --git a/ql/src/test/results/clientpositive/push_or.q.out b/ql/src/test/results/clientpositive/push_or.q.out index dacdc40..913fc67 100644 --- a/ql/src/test/results/clientpositive/push_or.q.out +++ b/ql/src/test/results/clientpositive/push_or.q.out @@ -73,7 +73,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -120,7 +120,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/quote1.q.out b/ql/src/test/results/clientpositive/quote1.q.out index f8592c4..f4fdc3f 100644 --- a/ql/src/test/results/clientpositive/quote1.q.out +++ b/ql/src/test/results/clientpositive/quote1.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) + outputColumnNames: location, type, table + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(location, 16), compute_stats(type, 16) + keys: table (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -71,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: location, type + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index fa4aae8..2a859d0 100644 --- a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -174,6 +191,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -216,6 +262,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/rcfile_default_format.q.out b/ql/src/test/results/clientpositive/rcfile_default_format.q.out index bb846c0..afa0797 100644 --- a/ql/src/test/results/clientpositive/rcfile_default_format.q.out +++ b/ql/src/test/results/clientpositive/rcfile_default_format.q.out @@ -121,7 +121,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 1 numRows 500 rawDataSize 1406 diff --git a/ql/src/test/results/clientpositive/rcfile_null_value.q.out b/ql/src/test/results/clientpositive/rcfile_null_value.q.out index a66655b..228c802 100644 --- a/ql/src/test/results/clientpositive/rcfile_null_value.q.out +++ b/ql/src/test/results/clientpositive/rcfile_null_value.q.out @@ -91,6 +91,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -149,6 +151,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.dest1_rc + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -163,6 +180,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1_rc + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/sample1.q.out b/ql/src/test/results/clientpositive/sample1.q.out index 1947e92..d2d85b8 100644 --- a/ql/src/test/results/clientpositive/sample1.q.out +++ b/ql/src/test/results/clientpositive/sample1.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -74,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(dt, 16), compute_stats(hr, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +144,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample2.q.out b/ql/src/test/results/clientpositive/sample2.q.out index 9c5e238..ada73a5 100644 --- a/ql/src/test/results/clientpositive/sample2.q.out +++ b/ql/src/test/results/clientpositive/sample2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -73,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample4.q.out b/ql/src/test/results/clientpositive/sample4.q.out index 20bd86c..61bb4d6 100644 --- a/ql/src/test/results/clientpositive/sample4.q.out +++ b/ql/src/test/results/clientpositive/sample4.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -73,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample5.q.out b/ql/src/test/results/clientpositive/sample5.q.out index 0ed1fb2..178b1bb 100644 --- a/ql/src/test/results/clientpositive/sample5.q.out +++ b/ql/src/test/results/clientpositive/sample5.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -74,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +144,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample6.q.out b/ql/src/test/results/clientpositive/sample6.q.out index 1988a90..e7593de 100644 --- a/ql/src/test/results/clientpositive/sample6.q.out +++ b/ql/src/test/results/clientpositive/sample6.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -73,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample7.q.out b/ql/src/test/results/clientpositive/sample7.q.out index 7de68a8..bed69e0 100644 --- a/ql/src/test/results/clientpositive/sample7.q.out +++ b/ql/src/test/results/clientpositive/sample7.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -74,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +144,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out b/ql/src/test/results/clientpositive/skewjoin.q.out index 477abb9..5eed010 100644 --- a/ql/src/test/results/clientpositive/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/skewjoin.q.out @@ -80,11 +80,13 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-0 - Stage-6 - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 , consists of Stage-7, Stage-0, Stage-3 + Stage-7 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -143,11 +145,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Conditional Operator - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: 1 @@ -161,7 +178,7 @@ STAGE PLANS: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -184,6 +201,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -200,6 +232,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index b53e670..304b3e5 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -93,7 +95,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -172,6 +174,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -205,6 +233,86 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -1827,7 +1935,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1936,7 +2044,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out index 9928a60..9072803 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out @@ -62,6 +62,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -129,7 +130,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -207,6 +208,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -254,6 +263,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -399,6 +409,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out index 49ff635..3bc00c8 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out @@ -112,7 +112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,7 +135,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -292,7 +292,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -315,7 +315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out index 6c41171..5bf8959 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out @@ -42,6 +42,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -73,6 +75,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value1, value2, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +107,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -157,6 +211,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -193,6 +248,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -273,6 +335,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -304,6 +368,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), '2' (type: string) + outputColumnNames: key, value1, value2, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -320,3 +400,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2' (type: string) + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out index c0fdfd3..0ceb6a5 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -78,6 +79,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -108,6 +116,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -139,6 +149,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -155,6 +181,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -185,6 +247,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -215,6 +279,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -231,6 +311,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -261,6 +377,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -292,6 +410,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -308,6 +442,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -338,6 +508,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -369,6 +541,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -385,6 +573,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -415,6 +639,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -445,6 +671,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -461,3 +703,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out index 36e8792..8bd0fd6 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY @@ -180,6 +188,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -214,6 +223,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index 82f5804..20d51db 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -620,6 +620,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -648,6 +649,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -671,6 +693,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out index 40c0ec5..5754a74 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out @@ -101,7 +101,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 1 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out index 0740df3..5dc2426 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out @@ -75,7 +75,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -184,7 +184,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out index fc5066c..28501ab 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out @@ -111,7 +111,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -158,7 +158,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -259,7 +259,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -308,7 +308,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/stats0.q.out b/ql/src/test/results/clientpositive/stats0.q.out index 01e00c8..4a9cf36 100644 --- a/ql/src/test/results/clientpositive/stats0.q.out +++ b/ql/src/test/results/clientpositive/stats0.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -63,6 +64,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -114,6 +131,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -147,6 +193,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + PREHOOK: query: insert overwrite table stats_non_partitioned select * from src PREHOOK: type: QUERY @@ -697,6 +751,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -717,6 +772,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -733,6 +822,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src PREHOOK: type: QUERY @@ -1332,6 +1428,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1380,6 +1477,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1431,6 +1544,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1473,6 +1615,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2205,6 +2355,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2228,6 +2379,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -2253,6 +2438,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/stats1.q.out b/ql/src/test/results/clientpositive/stats1.q.out index 2d5b4f8..e56f8d3 100644 --- a/ql/src/test/results/clientpositive/stats1.q.out +++ b/ql/src/test/results/clientpositive/stats1.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +106,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -106,6 +146,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: INSERT OVERWRITE TABLE tmptable SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 @@ -178,7 +225,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 26 rawDataSize 199 diff --git a/ql/src/test/results/clientpositive/stats10.q.out b/ql/src/test/results/clientpositive/stats10.q.out index d1fe473..9e95d04 100644 --- a/ql/src/test/results/clientpositive/stats10.q.out +++ b/ql/src/test/results/clientpositive/stats10.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,6 +50,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket3_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -64,6 +82,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY @@ -414,7 +468,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -453,7 +507,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats14.q.out b/ql/src/test/results/clientpositive/stats14.q.out index 8501746..1cd660c 100644 --- a/ql/src/test/results/clientpositive/stats14.q.out +++ b/ql/src/test/results/clientpositive/stats14.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats15.q.out b/ql/src/test/results/clientpositive/stats15.q.out index 8501746..1cd660c 100644 --- a/ql/src/test/results/clientpositive/stats15.q.out +++ b/ql/src/test/results/clientpositive/stats15.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats18.q.out b/ql/src/test/results/clientpositive/stats18.q.out index 4945808..de8918a 100644 --- a/ql/src/test/results/clientpositive/stats18.q.out +++ b/ql/src/test/results/clientpositive/stats18.q.out @@ -39,7 +39,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats4.q.out b/ql/src/test/results/clientpositive/stats4.q.out index 8f503a9..e12c351 100644 --- a/ql/src/test/results/clientpositive/stats4.q.out +++ b/ql/src/test/results/clientpositive/stats4.q.out @@ -48,16 +48,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -81,6 +79,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -96,6 +110,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -122,6 +170,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -152,15 +214,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -181,31 +234,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' @@ -2305,7 +2357,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2345,7 +2397,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2385,7 +2437,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2425,7 +2477,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out b/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out index b855b38..3cafd54 100644 --- a/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -42,6 +43,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +102,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.tmptable + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/stats_list_bucket.q.out b/ql/src/test/results/clientpositive/stats_list_bucket.q.out index 0c43b1b..e7b7b26 100644 --- a/ql/src/test/results/clientpositive/stats_list_bucket.q.out +++ b/ql/src/test/results/clientpositive/stats_list_bucket.q.out @@ -61,7 +61,7 @@ Database: default Table: stats_list_bucket #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -133,7 +133,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out index 88c2114..c537da5 100644 --- a/ql/src/test/results/clientpositive/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/stats_only_null.q.out @@ -73,46 +73,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -123,46 +89,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/stats_partial_size.q.out b/ql/src/test/results/clientpositive/stats_partial_size.q.out index c779741..f9fe2f9 100644 --- a/ql/src/test/results/clientpositive/stats_partial_size.q.out +++ b/ql/src/test/results/clientpositive/stats_partial_size.q.out @@ -47,16 +47,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sample_partitioned - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: x (type: int), y (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) TableScan alias: sample @@ -81,10 +81,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/statsfs.q.out b/ql/src/test/results/clientpositive/statsfs.q.out index d070e9a..16da734 100644 --- a/ql/src/test/results/clientpositive/statsfs.q.out +++ b/ql/src/test/results/clientpositive/statsfs.q.out @@ -176,7 +176,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -215,7 +215,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -343,7 +343,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -426,7 +426,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -466,7 +466,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out index b8923d8..de11c6b 100644 --- a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -58,18 +58,21 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 + Stage-11 is a root stage + Stage-2 depends on stages: Stage-11 Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-12 depends on stages: Stage-5, Stage-7, Stage-8 + Stage-13 depends on stages: Stage-5, Stage-7, Stage-8 Stage-6 depends on stages: Stage-2 Stage-0 depends on stages: Stage-6 Stage-7 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -221,6 +224,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -235,6 +258,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + Stage: Stage-6 Map Reduce Map Operator Tree: @@ -281,6 +318,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -295,6 +347,28 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -332,15 +406,18 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED +RUN: Stage-11:MAPRED RUN: Stage-2:MAPRED RUN: Stage-3:MAPRED RUN: Stage-6:MAPRED RUN: Stage-4:MAPRED RUN: Stage-0:MOVE +RUN: Stage-8:MAPRED RUN: Stage-1:MOVE RUN: Stage-7:STATS RUN: Stage-5:STATS +RUN: Stage-12:COLUMNSTATS +RUN: Stage-13:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -487,7 +564,7 @@ POSTHOOK: Input: default@src_5 199 val_199 199 val_199 2 val_2 -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain from src b @@ -518,22 +595,25 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 - Stage-17 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-17 - Stage-15 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-15 + Stage-11 is a root stage + Stage-15 depends on stages: Stage-11 , consists of Stage-18, Stage-2 + Stage-18 has a backup stage: Stage-2 + Stage-14 depends on stages: Stage-18 + Stage-16 depends on stages: Stage-2, Stage-14 + Stage-4 depends on stages: Stage-16 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-16 - Stage-0 depends on stages: Stage-12 + Stage-19 depends on stages: Stage-5, Stage-7, Stage-8 + Stage-20 depends on stages: Stage-5, Stage-7, Stage-8 + Stage-17 depends on stages: Stage-2, Stage-14 + Stage-13 depends on stages: Stage-17 + Stage-0 depends on stages: Stage-13 Stage-7 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-13 Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -576,10 +656,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-15 Conditional Operator - Stage: Stage-17 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -593,7 +673,7 @@ STAGE PLANS: 0 1 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -625,7 +705,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: sq_2:s1 @@ -687,6 +767,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -701,7 +801,21 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-16 + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-20 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: sq_1:a @@ -729,7 +843,7 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 _col0 (type: string), _col1 (type: string) - Stage: Stage-12 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -749,6 +863,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -765,6 +894,28 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Map Reduce Map Operator Tree: @@ -804,7 +955,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -842,18 +993,21 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-17:MAPREDLOCAL -RUN: Stage-13:MAPRED -RUN: Stage-15:MAPREDLOCAL +RUN: Stage-11:MAPRED +RUN: Stage-15:CONDITIONAL +RUN: Stage-18:MAPREDLOCAL +RUN: Stage-14:MAPRED RUN: Stage-16:MAPREDLOCAL +RUN: Stage-17:MAPREDLOCAL RUN: Stage-4:MAPRED -RUN: Stage-12:MAPRED +RUN: Stage-13:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE +RUN: Stage-8:MAPRED RUN: Stage-5:STATS RUN: Stage-7:STATS +RUN: Stage-19:COLUMNSTATS +RUN: Stage-20:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 diff --git a/ql/src/test/results/clientpositive/truncate_column.q.out b/ql/src/test/results/clientpositive/truncate_column.q.out index cc3bc89..7ff963c 100644 --- a/ql/src/test/results/clientpositive/truncate_column.q.out +++ b/ql/src/test/results/clientpositive/truncate_column.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 @@ -297,7 +297,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 1 numRows 10 @@ -495,7 +495,7 @@ Database: default Table: test_tab_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 diff --git a/ql/src/test/results/clientpositive/udf1.q.out b/ql/src/test/results/clientpositive/udf1.q.out index eebd90f..4037430 100644 --- a/ql/src/test/results/clientpositive/udf1.q.out +++ b/ql/src/test/results/clientpositive/udf1.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: boolean), _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: boolean) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16), compute_stats(c12, 16), compute_stats(c13, 16), compute_stats(c14, 16), compute_stats(c15, 16), compute_stats(c16, 16), compute_stats(c17, 16), compute_stats(c18, 16), compute_stats(c19, 16), compute_stats(c20, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: struct), _col16 (type: struct), _col17 (type: struct), _col18 (type: struct), _col19 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8), compute_stats(VALUE._col9), compute_stats(VALUE._col10), compute_stats(VALUE._col11), compute_stats(VALUE._col12), compute_stats(VALUE._col13), compute_stats(VALUE._col14), compute_stats(VALUE._col15), compute_stats(VALUE._col16), compute_stats(VALUE._col17), compute_stats(VALUE._col18), compute_stats(VALUE._col19) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Column Types: string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf3.q.out b/ql/src/test/results/clientpositive/udf3.q.out index 96038f1..56b5051 100644 --- a/ql/src/test/results/clientpositive/udf3.q.out +++ b/ql/src/test/results/clientpositive/udf3.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -43,6 +44,7 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE +<<<<<<< HEAD Select Operator expressions: UDFToString(_col0) (type: string), UDFToString(_col1) (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string), UDFToString(_col4) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -55,6 +57,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 +======= + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: int), _col4 (type: int) + outputColumnNames: c1, c2, c3, c4, c5 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +>>>>>>> out Stage: Stage-0 Move Operator @@ -69,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5 + Column Types: string, string, string, string, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(CAST('' AS INT)), sum(CAST('' AS INT)), avg(CAST('' AS INT)), min(CAST('' AS INT)), max(CAST('' AS INT)) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/udf_10_trims.q.out b/ql/src/test/results/clientpositive/udf_10_trims.q.out index 3a5303a..746a2ae 100644 --- a/ql/src/test/results/clientpositive/udf_10_trims.q.out +++ b/ql/src/test/results/clientpositive/udf_10_trims.q.out @@ -24,6 +24,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -50,6 +51,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -73,6 +100,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_character_length.q.out b/ql/src/test/results/clientpositive/udf_character_length.q.out index 332ec95..a85225f 100644 --- a/ql/src/test/results/clientpositive/udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/udf_character_length.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -71,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -94,6 +121,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_length.q.out b/ql/src/test/results/clientpositive/udf_length.q.out index fc795bb..0da98e2 100644 --- a/ql/src/test/results/clientpositive/udf_length.q.out +++ b/ql/src/test/results/clientpositive/udf_length.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_octet_length.q.out b/ql/src/test/results/clientpositive/udf_octet_length.q.out index f8738f8..af648fb 100644 --- a/ql/src/test/results/clientpositive/udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/udf_octet_length.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_reverse.q.out b/ql/src/test/results/clientpositive/udf_reverse.q.out index 28b0c9f..9257b2c 100644 --- a/ql/src/test/results/clientpositive/udf_reverse.q.out +++ b/ql/src/test/results/clientpositive/udf_reverse.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union10.q.out b/ql/src/test/results/clientpositive/union10.q.out index 85eabf5..5337e53 100644 --- a/ql/src/test/results/clientpositive/union10.q.out +++ b/ql/src/test/results/clientpositive/union10.q.out @@ -29,6 +29,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -88,6 +89,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +145,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -141,6 +194,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union12.q.out b/ql/src/test/results/clientpositive/union12.q.out index 1b02d16..30654a9 100644 --- a/ql/src/test/results/clientpositive/union12.q.out +++ b/ql/src/test/results/clientpositive/union12.q.out @@ -29,6 +29,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -88,6 +89,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +145,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -141,6 +194,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union17.q.out b/ql/src/test/results/clientpositive/union17.q.out index 18e06e1..db2ee12 100644 --- a/ql/src/test/results/clientpositive/union17.q.out +++ b/ql/src/test/results/clientpositive/union17.q.out @@ -33,9 +33,13 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -140,6 +144,7 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL +<<<<<<< HEAD Select Operator expressions: _col0 (type: string), UDFToString(_col1) (type: string) outputColumnNames: _col0, _col1 @@ -152,6 +157,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 +======= + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +>>>>>>> out Stage: Stage-0 Move Operator @@ -166,11 +196,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -182,6 +248,7 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL +<<<<<<< HEAD Select Operator expressions: _col0 (type: string), _col1 (type: string), UDFToString(_col2) (type: string) outputColumnNames: _col0, _col1, _col2 @@ -194,6 +261,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 +======= + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +>>>>>>> out Stage: Stage-1 Move Operator @@ -205,9 +297,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git a/ql/src/test/results/clientpositive/union18.q.out b/ql/src/test/results/clientpositive/union18.q.out index caa664c..2a78eaa 100644 --- a/ql/src/test/results/clientpositive/union18.q.out +++ b/ql/src/test/results/clientpositive/union18.q.out @@ -35,16 +35,14 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-13 depends on stages: Stage-4, Stage-10, Stage-11 + Stage-14 depends on stages: Stage-4, Stage-10, Stage-11 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-1 depends on stages: Stage-3 Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -96,6 +94,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -107,6 +118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -125,6 +151,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -136,6 +175,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -159,6 +226,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -189,15 +270,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -215,31 +287,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/union19.q.out b/ql/src/test/results/clientpositive/union19.q.out index 5ce5905..a002275 100644 --- a/ql/src/test/results/clientpositive/union19.q.out +++ b/ql/src/test/results/clientpositive/union19.q.out @@ -33,8 +33,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -101,6 +105,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +153,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +187,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -167,6 +216,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -177,9 +262,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git a/ql/src/test/results/clientpositive/union22.q.out b/ql/src/test/results/clientpositive/union22.q.out index f60dbf6..3106201 100644 --- a/ql/src/test/results/clientpositive/union22.q.out +++ b/ql/src/test/results/clientpositive/union22.q.out @@ -77,6 +77,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-4, Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 STAGE PLANS: @@ -97,7 +98,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -225,7 +226,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -272,7 +273,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -361,6 +362,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -395,6 +415,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -428,7 +467,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -470,6 +509,40 @@ STAGE PLANS: Truncated Path -> Alias: /dst_union22_delta/ds=1 [null-subquery1:$hdt$_0-subquery1:dst_union22_delta] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -502,6 +575,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -558,7 +639,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -605,7 +686,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 diff --git a/ql/src/test/results/clientpositive/union28.q.out b/ql/src/test/results/clientpositive/union28.q.out index c3789d0..0d9ff2f 100644 --- a/ql/src/test/results/clientpositive/union28.q.out +++ b/ql/src/test/results/clientpositive/union28.q.out @@ -39,6 +39,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -102,6 +103,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -117,6 +131,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +159,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -155,6 +208,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union29.q.out b/ql/src/test/results/clientpositive/union29.q.out index 87ba275..db92f3c 100644 --- a/ql/src/test/results/clientpositive/union29.q.out +++ b/ql/src/test/results/clientpositive/union29.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -67,6 +68,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -88,6 +102,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -109,6 +136,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -132,6 +185,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union30.q.out b/ql/src/test/results/clientpositive/union30.q.out index 26a27c8..d033a51 100644 --- a/ql/src/test/results/clientpositive/union30.q.out +++ b/ql/src/test/results/clientpositive/union30.q.out @@ -53,6 +53,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -116,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -131,6 +145,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -146,6 +173,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -167,6 +207,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -190,6 +256,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union31.q.out b/ql/src/test/results/clientpositive/union31.q.out index 4c26c7b..b04b1ea 100644 --- a/ql/src/test/results/clientpositive/union31.q.out +++ b/ql/src/test/results/clientpositive/union31.q.out @@ -72,9 +72,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -181,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -195,11 +214,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -224,6 +279,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -235,9 +305,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from t1 union all @@ -340,12 +432,16 @@ insert overwrite table t6 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2, Stage-6 + Stage-3 depends on stages: Stage-2, Stage-8 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - Stage-6 is a root stage + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 + Stage-8 is a root stage STAGE PLANS: Stage: Stage-2 @@ -426,6 +522,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -444,6 +555,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -458,6 +584,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -468,10 +630,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator - Stage: Stage-6 + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -654,8 +838,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -752,6 +940,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -770,6 +973,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -784,6 +1002,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -794,9 +1048,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( select key as c1, count(1) as cnt from t1 group by key diff --git a/ql/src/test/results/clientpositive/union33.q.out b/ql/src/test/results/clientpositive/union33.q.out index 17aeecd..e9ad715 100644 --- a/ql/src/test/results/clientpositive/union33.q.out +++ b/ql/src/test/results/clientpositive/union33.q.out @@ -32,6 +32,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -124,6 +125,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +149,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -158,6 +198,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -247,6 +294,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -330,6 +378,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -350,6 +411,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -373,6 +460,13 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + Stage: Stage-5 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union4.q.out b/ql/src/test/results/clientpositive/union4.q.out index 0821589..8524d72 100644 --- a/ql/src/test/results/clientpositive/union4.q.out +++ b/ql/src/test/results/clientpositive/union4.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -83,6 +84,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE @@ -98,6 +112,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -121,6 +161,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union6.q.out b/ql/src/test/results/clientpositive/union6.q.out index 8448d86..53ffe5a 100644 --- a/ql/src/test/results/clientpositive/union6.q.out +++ b/ql/src/test/results/clientpositive/union6.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -78,6 +79,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +109,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -118,6 +158,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union_lateralview.q.out b/ql/src/test/results/clientpositive/union_lateralview.q.out index 7b89e50..4c65003 100644 --- a/ql/src/test/results/clientpositive/union_lateralview.q.out +++ b/ql/src/test/results/clientpositive/union_lateralview.q.out @@ -48,6 +48,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -178,6 +180,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_union_lateral_view + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, arr_ele, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(arr_ele, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -192,6 +209,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, arr_ele, value + Column Types: int, int, string + Table: default.test_union_lateral_view + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view SELECT b.key, d.arr_ele, d.value FROM ( diff --git a/ql/src/test/results/clientpositive/union_stats.q.out b/ql/src/test/results/clientpositive/union_stats.q.out index 9ea0b51..08ac9ee 100644 --- a/ql/src/test/results/clientpositive/union_stats.q.out +++ b/ql/src/test/results/clientpositive/union_stats.q.out @@ -487,7 +487,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 @@ -522,7 +522,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 diff --git a/ql/src/test/results/clientpositive/vector_bucket.q.out b/ql/src/test/results/clientpositive/vector_bucket.q.out index 0eeb8a5..5166746 100644 --- a/ql/src/test/results/clientpositive/vector_bucket.q.out +++ b/ql/src/test/results/clientpositive/vector_bucket.q.out @@ -20,6 +20,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.non_orc_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +110,47 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.non_orc_table + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: select a, b from non_orc_table order by a PREHOOK: type: QUERY PREHOOK: Input: default@non_orc_table diff --git a/ql/src/test/results/clientpositive/vector_char_4.q.out b/ql/src/test/results/clientpositive/vector_char_4.q.out index 1c58fd2..3464637 100644 --- a/ql/src/test/results/clientpositive/vector_char_4.q.out +++ b/ql/src/test/results/clientpositive/vector_char_4.q.out @@ -137,6 +137,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -148,38 +149,58 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: char(20)), _col3 (type: char(30)), _col4 (type: char(20)), _col5 (type: char(20)), _col6 (type: char(50)) + outputColumnNames: ct, csi, ci, cb, cf, cd, cs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ct, 16), compute_stats(csi, 16), compute_stats(ci, 16), compute_stats(cb, 16), compute_stats(cf, 16), compute_stats(cd, 16), compute_stats(cs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -203,6 +224,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ct, csi, ci, cb, cf, cd, cs + Column Types: char(10), char(10), char(20), char(30), char(20), char(20), char(50) + Table: default.char_lazy_binary_columnar + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_char_simple.q.out b/ql/src/test/results/clientpositive/vector_char_simple.q.out index c7b00fd..36c357e 100644 --- a/ql/src/test/results/clientpositive/vector_char_simple.q.out +++ b/ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -212,6 +212,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -228,8 +230,8 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Execution mode: vectorized Map Vectorization: enabled: true @@ -244,11 +246,32 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: + Group By Vectorization: + vectorOutput: false + native: false Stage: Stage-0 Stage: Stage-2 + Stage: Stage-4 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Vectorization: + vectorOutput: false + native: false + PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git a/ql/src/test/results/clientpositive/vector_groupby4.q.out b/ql/src/test/results/clientpositive/vector_groupby4.q.out index 799797d..2048ab0 100644 --- a/ql/src/test/results/clientpositive/vector_groupby4.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -35,6 +35,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -143,6 +146,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,6 +170,81 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string), 16 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_groupby6.q.out b/ql/src/test/results/clientpositive/vector_groupby6.q.out index 6fee467..686e2a1 100644 --- a/ql/src/test/results/clientpositive/vector_groupby6.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -35,6 +35,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -143,6 +146,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,6 +170,81 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string), 16 (type: int) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/vector_multi_insert.q.out index 4013cd4..c44a10f 100644 --- a/ql/src/test/results/clientpositive/vector_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/vector_multi_insert.q.out @@ -85,23 +85,18 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-14 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-15 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-16 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-1 depends on stages: Stage-3 Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 + Stage-11 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 + Stage-12 depends on stages: Stage-2 + Stage-13 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -125,6 +120,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -140,6 +148,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (rn >= 1000) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -155,15 +178,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -187,6 +239,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn1 + + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn2 + + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn3 + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -217,15 +290,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -243,40 +307,33 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator - - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -288,38 +345,40 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Stage: Stage-16 + Stage: Stage-12 Stats-Aggr Operator - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 - - Stage: Stage-19 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 diff --git a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index 38f13da..6b98dd2 100644 --- a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -96,6 +96,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -108,38 +109,58 @@ STAGE PLANS: alias: alltypesorc Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: 17.29 (type: decimal(18,9)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [12] - selectExpressions: ConstantVectorExpression(val 17.29) -> 12:decimal(18,9) Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.decimal_2 - Execution mode: vectorized + Select Operator + expressions: 17.29 (type: decimal(18,9)) + outputColumnNames: t + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(t, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 684 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 684 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -163,6 +184,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: t + Column Types: decimal(18,9) + Table: default.decimal_2 + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_udf_character_length.q.out b/ql/src/test/results/clientpositive/vector_udf_character_length.q.out index 81d801c..601ed7a 100644 --- a/ql/src/test/results/clientpositive/vector_udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/vector_udf_character_length.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -71,7 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +121,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out b/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out index c71cfef..08844a7 100644 --- a/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,7 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -78,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_varchar_4.q.out b/ql/src/test/results/clientpositive/vector_varchar_4.q.out index 205c67a..45e1322 100644 --- a/ql/src/test/results/clientpositive/vector_varchar_4.q.out +++ b/ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -137,6 +137,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -148,38 +149,58 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: varchar(20)), _col3 (type: varchar(30)), _col4 (type: varchar(20)), _col5 (type: varchar(20)), _col6 (type: varchar(50)) + outputColumnNames: vt, vsi, vi, vb, vf, vd, vs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(vt, 16), compute_stats(vsi, 16), compute_stats(vi, 16), compute_stats(vb, 16), compute_stats(vf, 16), compute_stats(vd, 16), compute_stats(vs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -203,6 +224,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: vt, vsi, vi, vb, vf, vd, vs + Column Types: varchar(10), varchar(10), varchar(20), varchar(30), varchar(20), varchar(20), varchar(50) + Table: default.varchar_lazy_binary_columnar + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_varchar_simple.q.out b/ql/src/test/results/clientpositive/vector_varchar_simple.q.out index 1bed203..7b81611 100644 --- a/ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ b/ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -276,6 +276,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -306,10 +308,9 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized Map Vectorization: @@ -344,6 +345,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.varchar_3 + Select Operator + expressions: _col0 (type: varchar(25)) + outputColumnNames: field + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(field, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -358,6 +378,47 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: field + Column Types: varchar(25) + Table: default.varchar_3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git a/ql/src/test/results/clientpositive/vectorized_context.q.out b/ql/src/test/results/clientpositive/vectorized_context.q.out index dac9b6b..09efe09 100644 --- a/ql/src/test/results/clientpositive/vectorized_context.q.out +++ b/ql/src/test/results/clientpositive/vectorized_context.q.out @@ -109,29 +109,14 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:store_sales + $hdt$_1:household_demographics Fetch Operator limit: -1 - $hdt$_2:household_demographics + $hdt$_2:store Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:store_sales - TableScan - alias: store_sales - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - $hdt$_2:household_demographics + $hdt$_1:household_demographics TableScan alias: household_demographics Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE @@ -146,10 +131,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: + $hdt$_2:store TableScan alias: store Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE @@ -160,24 +142,42 @@ STAGE PLANS: expressions: s_store_sk (type: int), s_city (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col0, _col2 Statistics: Num rows: 6682 Data size: 80009 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col2, _col5 Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col2 (type: double) + expressions: _col5 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Limit